Keras-2.2.4/0000755000000000116100000000000013355226624012331 5ustar rooteng00000000000000Keras-2.2.4/PKG-INFO0000644000000000116100000000326313355226624013432 0ustar rooteng00000000000000Metadata-Version: 2.1 Name: Keras Version: 2.2.4 Summary: Deep Learning for humans Home-page: https://github.com/keras-team/keras Author: Francois Chollet Author-email: francois.chollet@gmail.com License: MIT Download-URL: https://github.com/keras-team/keras/tarball/2.2.4 Description: Keras is a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano. Use Keras if you need a deep learning library that: - Allows for easy and fast prototyping (through user friendliness, modularity, and extensibility). - Supports both convolutional networks and recurrent networks, as well as combinations of the two. - Runs seamlessly on CPU and GPU. Read the documentation at: https://keras.io/ For a detailed overview of what makes Keras special, see: https://keras.io/why-use-keras/ Keras is compatible with Python 2.7-3.6 and is distributed under the MIT license. Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers Classifier: Intended Audience :: Education Classifier: Intended Audience :: Science/Research Classifier: License :: OSI Approved :: MIT License Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.6 Classifier: Topic :: Software Development :: Libraries Classifier: Topic :: Software Development :: Libraries :: Python Modules Provides-Extra: tests Provides-Extra: visualize Keras-2.2.4/keras/0000755000000000116100000000000013355226624013436 5ustar rooteng00000000000000Keras-2.2.4/keras/wrappers/0000755000000000116100000000000013355226624015301 5ustar rooteng00000000000000Keras-2.2.4/keras/wrappers/__init__.py0000644000000000116100000000000013146670577017410 0ustar rooteng00000000000000Keras-2.2.4/keras/wrappers/scikit_learn.py0000644000000000116100000003155113326715636020333 0ustar rooteng00000000000000"""Wrapper for using the Scikit-Learn API with Keras models. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy import types import numpy as np from ..utils.np_utils import to_categorical from ..utils.generic_utils import has_arg from ..utils.generic_utils import to_list from ..models import Sequential class BaseWrapper(object): """Base class for the Keras scikit-learn wrapper. Warning: This class should not be used directly. Use descendant classes instead. # Arguments build_fn: callable function or class instance **sk_params: model parameters & fitting parameters The `build_fn` should construct, compile and return a Keras model, which will then be used to fit/predict. One of the following three values could be passed to `build_fn`: 1. A function 2. An instance of a class that implements the `__call__` method 3. None. This means you implement a class that inherits from either `KerasClassifier` or `KerasRegressor`. The `__call__` method of the present class will then be treated as the default `build_fn`. `sk_params` takes both model parameters and fitting parameters. Legal model parameters are the arguments of `build_fn`. Note that like all other estimators in scikit-learn, `build_fn` should provide default values for its arguments, so that you could create the estimator without passing any values to `sk_params`. `sk_params` could also accept parameters for calling `fit`, `predict`, `predict_proba`, and `score` methods (e.g., `epochs`, `batch_size`). fitting (predicting) parameters are selected in the following order: 1. Values passed to the dictionary arguments of `fit`, `predict`, `predict_proba`, and `score` methods 2. Values passed to `sk_params` 3. The default values of the `keras.models.Sequential` `fit`, `predict`, `predict_proba` and `score` methods When using scikit-learn's `grid_search` API, legal tunable parameters are those you could pass to `sk_params`, including fitting parameters. In other words, you could use `grid_search` to search for the best `batch_size` or `epochs` as well as the model parameters. """ def __init__(self, build_fn=None, **sk_params): self.build_fn = build_fn self.sk_params = sk_params self.check_params(sk_params) def check_params(self, params): """Checks for user typos in `params`. # Arguments params: dictionary; the parameters to be checked # Raises ValueError: if any member of `params` is not a valid argument. """ legal_params_fns = [Sequential.fit, Sequential.predict, Sequential.predict_classes, Sequential.evaluate] if self.build_fn is None: legal_params_fns.append(self.__call__) elif (not isinstance(self.build_fn, types.FunctionType) and not isinstance(self.build_fn, types.MethodType)): legal_params_fns.append(self.build_fn.__call__) else: legal_params_fns.append(self.build_fn) for params_name in params: for fn in legal_params_fns: if has_arg(fn, params_name): break else: if params_name != 'nb_epoch': raise ValueError( '{} is not a legal parameter'.format(params_name)) def get_params(self, **params): """Gets parameters for this estimator. # Arguments **params: ignored (exists for API compatibility). # Returns Dictionary of parameter names mapped to their values. """ res = copy.deepcopy(self.sk_params) res.update({'build_fn': self.build_fn}) return res def set_params(self, **params): """Sets the parameters of this estimator. # Arguments **params: Dictionary of parameter names mapped to their values. # Returns self """ self.check_params(params) self.sk_params.update(params) return self def fit(self, x, y, **kwargs): """Constructs a new model with `build_fn` & fit the model to `(x, y)`. # Arguments x : array-like, shape `(n_samples, n_features)` Training samples where `n_samples` is the number of samples and `n_features` is the number of features. y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` True labels for `x`. **kwargs: dictionary arguments Legal arguments are the arguments of `Sequential.fit` # Returns history : object details about the training history at each epoch. """ if self.build_fn is None: self.model = self.__call__(**self.filter_sk_params(self.__call__)) elif (not isinstance(self.build_fn, types.FunctionType) and not isinstance(self.build_fn, types.MethodType)): self.model = self.build_fn( **self.filter_sk_params(self.build_fn.__call__)) else: self.model = self.build_fn(**self.filter_sk_params(self.build_fn)) loss_name = self.model.loss if hasattr(loss_name, '__name__'): loss_name = loss_name.__name__ if loss_name == 'categorical_crossentropy' and len(y.shape) != 2: y = to_categorical(y) fit_args = copy.deepcopy(self.filter_sk_params(Sequential.fit)) fit_args.update(kwargs) history = self.model.fit(x, y, **fit_args) return history def filter_sk_params(self, fn, override=None): """Filters `sk_params` and returns those in `fn`'s arguments. # Arguments fn : arbitrary function override: dictionary, values to override `sk_params` # Returns res : dictionary containing variables in both `sk_params` and `fn`'s arguments. """ override = override or {} res = {} for name, value in self.sk_params.items(): if has_arg(fn, name): res.update({name: value}) res.update(override) return res class KerasClassifier(BaseWrapper): """Implementation of the scikit-learn classifier API for Keras. """ def fit(self, x, y, sample_weight=None, **kwargs): """Constructs a new model with `build_fn` & fit the model to `(x, y)`. # Arguments x : array-like, shape `(n_samples, n_features)` Training samples where `n_samples` is the number of samples and `n_features` is the number of features. y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` True labels for `x`. **kwargs: dictionary arguments Legal arguments are the arguments of `Sequential.fit` # Returns history : object details about the training history at each epoch. # Raises ValueError: In case of invalid shape for `y` argument. """ y = np.array(y) if len(y.shape) == 2 and y.shape[1] > 1: self.classes_ = np.arange(y.shape[1]) elif (len(y.shape) == 2 and y.shape[1] == 1) or len(y.shape) == 1: self.classes_ = np.unique(y) y = np.searchsorted(self.classes_, y) else: raise ValueError('Invalid shape for y: ' + str(y.shape)) self.n_classes_ = len(self.classes_) if sample_weight is not None: kwargs['sample_weight'] = sample_weight return super(KerasClassifier, self).fit(x, y, **kwargs) def predict(self, x, **kwargs): """Returns the class predictions for the given test data. # Arguments x: array-like, shape `(n_samples, n_features)` Test samples where `n_samples` is the number of samples and `n_features` is the number of features. **kwargs: dictionary arguments Legal arguments are the arguments of `Sequential.predict_classes`. # Returns preds: array-like, shape `(n_samples,)` Class predictions. """ kwargs = self.filter_sk_params(Sequential.predict_classes, kwargs) proba = self.model.predict(x, **kwargs) if proba.shape[-1] > 1: classes = proba.argmax(axis=-1) else: classes = (proba > 0.5).astype('int32') return self.classes_[classes] def predict_proba(self, x, **kwargs): """Returns class probability estimates for the given test data. # Arguments x: array-like, shape `(n_samples, n_features)` Test samples where `n_samples` is the number of samples and `n_features` is the number of features. **kwargs: dictionary arguments Legal arguments are the arguments of `Sequential.predict_classes`. # Returns proba: array-like, shape `(n_samples, n_outputs)` Class probability estimates. In the case of binary classification, to match the scikit-learn API, will return an array of shape `(n_samples, 2)` (instead of `(n_sample, 1)` as in Keras). """ kwargs = self.filter_sk_params(Sequential.predict_proba, kwargs) probs = self.model.predict(x, **kwargs) # check if binary classification if probs.shape[1] == 1: # first column is probability of class 0 and second is of class 1 probs = np.hstack([1 - probs, probs]) return probs def score(self, x, y, **kwargs): """Returns the mean accuracy on the given test data and labels. # Arguments x: array-like, shape `(n_samples, n_features)` Test samples where `n_samples` is the number of samples and `n_features` is the number of features. y: array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` True labels for `x`. **kwargs: dictionary arguments Legal arguments are the arguments of `Sequential.evaluate`. # Returns score: float Mean accuracy of predictions on `x` wrt. `y`. # Raises ValueError: If the underlying model isn't configured to compute accuracy. You should pass `metrics=["accuracy"]` to the `.compile()` method of the model. """ y = np.searchsorted(self.classes_, y) kwargs = self.filter_sk_params(Sequential.evaluate, kwargs) loss_name = self.model.loss if hasattr(loss_name, '__name__'): loss_name = loss_name.__name__ if loss_name == 'categorical_crossentropy' and len(y.shape) != 2: y = to_categorical(y) outputs = self.model.evaluate(x, y, **kwargs) outputs = to_list(outputs) for name, output in zip(self.model.metrics_names, outputs): if name == 'acc': return output raise ValueError('The model is not configured to compute accuracy. ' 'You should pass `metrics=["accuracy"]` to ' 'the `model.compile()` method.') class KerasRegressor(BaseWrapper): """Implementation of the scikit-learn regressor API for Keras. """ def predict(self, x, **kwargs): """Returns predictions for the given test data. # Arguments x: array-like, shape `(n_samples, n_features)` Test samples where `n_samples` is the number of samples and `n_features` is the number of features. **kwargs: dictionary arguments Legal arguments are the arguments of `Sequential.predict`. # Returns preds: array-like, shape `(n_samples,)` Predictions. """ kwargs = self.filter_sk_params(Sequential.predict, kwargs) return np.squeeze(self.model.predict(x, **kwargs)) def score(self, x, y, **kwargs): """Returns the mean loss on the given test data and labels. # Arguments x: array-like, shape `(n_samples, n_features)` Test samples where `n_samples` is the number of samples and `n_features` is the number of features. y: array-like, shape `(n_samples,)` True labels for `x`. **kwargs: dictionary arguments Legal arguments are the arguments of `Sequential.evaluate`. # Returns score: float Mean accuracy of predictions on `x` wrt. `y`. """ kwargs = self.filter_sk_params(Sequential.evaluate, kwargs) loss = self.model.evaluate(x, y, **kwargs) if isinstance(loss, list): return -loss[0] return -loss Keras-2.2.4/keras/metrics.py0000644000000000116100000000526413354530144015457 0ustar rooteng00000000000000"""Built-in metrics. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import six from . import backend as K from .losses import mean_squared_error from .losses import mean_absolute_error from .losses import mean_absolute_percentage_error from .losses import mean_squared_logarithmic_error from .losses import hinge from .losses import logcosh from .losses import squared_hinge from .losses import categorical_crossentropy from .losses import sparse_categorical_crossentropy from .losses import binary_crossentropy from .losses import kullback_leibler_divergence from .losses import poisson from .losses import cosine_proximity from .utils.generic_utils import deserialize_keras_object from .utils.generic_utils import serialize_keras_object def binary_accuracy(y_true, y_pred): return K.mean(K.equal(y_true, K.round(y_pred)), axis=-1) def categorical_accuracy(y_true, y_pred): return K.cast(K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)), K.floatx()) def sparse_categorical_accuracy(y_true, y_pred): # flatten y_true in case it's in shape (num_samples, 1) instead of (num_samples,) return K.cast(K.equal(K.flatten(y_true), K.cast(K.argmax(y_pred, axis=-1), K.floatx())), K.floatx()) def top_k_categorical_accuracy(y_true, y_pred, k=5): return K.mean(K.in_top_k(y_pred, K.argmax(y_true, axis=-1), k), axis=-1) def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5): # If the shape of y_true is (num_samples, 1), flatten to (num_samples,) return K.mean(K.in_top_k(y_pred, K.cast(K.flatten(y_true), 'int32'), k), axis=-1) # Aliases mse = MSE = mean_squared_error mae = MAE = mean_absolute_error mape = MAPE = mean_absolute_percentage_error msle = MSLE = mean_squared_logarithmic_error cosine = cosine_proximity def serialize(metric): return serialize_keras_object(metric) def deserialize(config, custom_objects=None): return deserialize_keras_object(config, module_objects=globals(), custom_objects=custom_objects, printable_module_name='metric function') def get(identifier): if isinstance(identifier, dict): config = {'class_name': str(identifier), 'config': {}} return deserialize(config) elif isinstance(identifier, six.string_types): return deserialize(str(identifier)) elif callable(identifier): return identifier else: raise ValueError('Could not interpret ' 'metric function identifier:', identifier) Keras-2.2.4/keras/layers/0000755000000000116100000000000013355226624014735 5ustar rooteng00000000000000Keras-2.2.4/keras/layers/merge.py0000644000000000116100000005674613354530144016422 0ustar rooteng00000000000000"""Layers that can merge several inputs into one. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from ..engine.base_layer import Layer from .. import backend as K class _Merge(Layer): """Generic merge layer for elementwise merge functions. Used to implement `Sum`, `Average`, etc. # Arguments **kwargs: standard layer keyword arguments. """ def __init__(self, **kwargs): super(_Merge, self).__init__(**kwargs) self.supports_masking = True def _merge_function(self, inputs): raise NotImplementedError def _compute_elemwise_op_output_shape(self, shape1, shape2): """Computes the shape of the resultant of an elementwise operation. # Arguments shape1: tuple or None. Shape of the first tensor shape2: tuple or None. Shape of the second tensor # Returns expected output shape when an element-wise operation is carried out on 2 tensors with shapes shape1 and shape2. tuple or None. # Raises ValueError: if shape1 and shape2 are not compatible for element-wise operations. """ if None in [shape1, shape2]: return None elif len(shape1) < len(shape2): return self._compute_elemwise_op_output_shape(shape2, shape1) elif not shape2: return shape1 output_shape = list(shape1[:-len(shape2)]) for i, j in zip(shape1[-len(shape2):], shape2): if i is None or j is None: output_shape.append(None) elif i == 1: output_shape.append(j) elif j == 1: output_shape.append(i) else: if i != j: raise ValueError('Operands could not be broadcast ' 'together with shapes ' + str(shape1) + ' ' + str(shape2)) output_shape.append(i) return tuple(output_shape) def build(self, input_shape): # Used purely for shape validation. if not isinstance(input_shape, list): raise ValueError('A merge layer should be called ' 'on a list of inputs.') if len(input_shape) < 2: raise ValueError('A merge layer should be called ' 'on a list of at least 2 inputs. ' 'Got ' + str(len(input_shape)) + ' inputs.') batch_sizes = [s[0] for s in input_shape if s is not None] batch_sizes = set(batch_sizes) batch_sizes -= set([None]) if len(batch_sizes) > 1: raise ValueError('Can not merge tensors with different ' 'batch sizes. Got tensors with shapes : ' + str(input_shape)) if input_shape[0] is None: output_shape = None else: output_shape = input_shape[0][1:] for i in range(1, len(input_shape)): if input_shape[i] is None: shape = None else: shape = input_shape[i][1:] output_shape = self._compute_elemwise_op_output_shape(output_shape, shape) # If the inputs have different ranks, we have to reshape them # to make them broadcastable. if None not in input_shape and len(set(map(len, input_shape))) == 1: self._reshape_required = False else: self._reshape_required = True def call(self, inputs): if not isinstance(inputs, list): raise ValueError('A merge layer should be called ' 'on a list of inputs.') if self._reshape_required: reshaped_inputs = [] input_ndims = list(map(K.ndim, inputs)) if None not in input_ndims: # If ranks of all inputs are available, # we simply expand each of them at axis=1 # until all of them have the same rank. max_ndim = max(input_ndims) for x in inputs: x_ndim = K.ndim(x) for _ in range(max_ndim - x_ndim): x = K.expand_dims(x, 1) reshaped_inputs.append(x) return self._merge_function(reshaped_inputs) else: # Transpose all inputs so that batch size is the last dimension. # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , batch_size) transposed = False for x in inputs: x_ndim = K.ndim(x) if x_ndim is None: x_shape = K.shape(x) batch_size = x_shape[0] new_shape = K.concatenate([x_shape[1:], K.expand_dims(batch_size)]) x_transposed = K.reshape(x, K.stack([batch_size, K.prod(x_shape[1:])])) x_transposed = K.permute_dimensions(x_transposed, (1, 0)) x_transposed = K.reshape(x_transposed, new_shape) reshaped_inputs.append(x_transposed) transposed = True elif x_ndim > 1: dims = list(range(1, x_ndim)) + [0] reshaped_inputs.append(K.permute_dimensions(x, dims)) transposed = True else: # We don't transpose inputs if they are # 1D vectors or scalars. reshaped_inputs.append(x) y = self._merge_function(reshaped_inputs) y_ndim = K.ndim(y) if transposed: # If inputs have been transposed, # we have to transpose the output too. if y_ndim is None: y_shape = K.shape(y) y_ndim = K.shape(y_shape)[0] batch_size = y_shape[y_ndim - 1] new_shape = K.concatenate([K.expand_dims(batch_size), y_shape[:y_ndim - 1]]) y = K.reshape(y, (-1, batch_size)) y = K.permute_dimensions(y, (1, 0)) y = K.reshape(y, new_shape) elif y_ndim > 1: dims = [y_ndim - 1] + list(range(y_ndim - 1)) y = K.permute_dimensions(y, dims) return y else: return self._merge_function(inputs) def compute_output_shape(self, input_shape): if input_shape[0] is None: output_shape = None else: output_shape = input_shape[0][1:] for i in range(1, len(input_shape)): if input_shape[i] is None: shape = None else: shape = input_shape[i][1:] output_shape = self._compute_elemwise_op_output_shape(output_shape, shape) batch_sizes = [s[0] for s in input_shape if s is not None] batch_sizes = set(batch_sizes) batch_sizes -= set([None]) if len(batch_sizes) == 1: output_shape = (list(batch_sizes)[0],) + output_shape else: output_shape = (None,) + output_shape return output_shape def compute_mask(self, inputs, mask=None): if mask is None: return None if not isinstance(mask, list): raise ValueError('`mask` should be a list.') if not isinstance(inputs, list): raise ValueError('`inputs` should be a list.') if len(mask) != len(inputs): raise ValueError('The lists `inputs` and `mask` ' 'should have the same length.') if all([m is None for m in mask]): return None masks = [K.expand_dims(m, 0) for m in mask if m is not None] return K.all(K.concatenate(masks, axis=0), axis=0, keepdims=False) class Add(_Merge): """Layer that adds a list of inputs. It takes as input a list of tensors, all of the same shape, and returns a single tensor (also of the same shape). # Examples ```python import keras input1 = keras.layers.Input(shape=(16,)) x1 = keras.layers.Dense(8, activation='relu')(input1) input2 = keras.layers.Input(shape=(32,)) x2 = keras.layers.Dense(8, activation='relu')(input2) # equivalent to added = keras.layers.add([x1, x2]) added = keras.layers.Add()([x1, x2]) out = keras.layers.Dense(4)(added) model = keras.models.Model(inputs=[input1, input2], outputs=out) ``` """ def _merge_function(self, inputs): output = inputs[0] for i in range(1, len(inputs)): output += inputs[i] return output class Subtract(_Merge): """Layer that subtracts two inputs. It takes as input a list of tensors of size 2, both of the same shape, and returns a single tensor, (inputs[0] - inputs[1]), also of the same shape. # Examples ```python import keras input1 = keras.layers.Input(shape=(16,)) x1 = keras.layers.Dense(8, activation='relu')(input1) input2 = keras.layers.Input(shape=(32,)) x2 = keras.layers.Dense(8, activation='relu')(input2) # Equivalent to subtracted = keras.layers.subtract([x1, x2]) subtracted = keras.layers.Subtract()([x1, x2]) out = keras.layers.Dense(4)(subtracted) model = keras.models.Model(inputs=[input1, input2], outputs=out) ``` """ def build(self, input_shape): super(Subtract, self).build(input_shape) if len(input_shape) != 2: raise ValueError('A `Subtract` layer should be called ' 'on exactly 2 inputs') def _merge_function(self, inputs): if len(inputs) != 2: raise ValueError('A `Subtract` layer should be called ' 'on exactly 2 inputs') return inputs[0] - inputs[1] class Multiply(_Merge): """Layer that multiplies (element-wise) a list of inputs. It takes as input a list of tensors, all of the same shape, and returns a single tensor (also of the same shape). """ def _merge_function(self, inputs): output = inputs[0] for i in range(1, len(inputs)): output *= inputs[i] return output class Average(_Merge): """Layer that averages a list of inputs. It takes as input a list of tensors, all of the same shape, and returns a single tensor (also of the same shape). """ def _merge_function(self, inputs): output = inputs[0] for i in range(1, len(inputs)): output += inputs[i] return output / len(inputs) class Maximum(_Merge): """Layer that computes the maximum (element-wise) a list of inputs. It takes as input a list of tensors, all of the same shape, and returns a single tensor (also of the same shape). """ def _merge_function(self, inputs): output = inputs[0] for i in range(1, len(inputs)): output = K.maximum(output, inputs[i]) return output class Minimum(_Merge): """Layer that computes the minimum (element-wise) a list of inputs. It takes as input a list of tensors, all of the same shape, and returns a single tensor (also of the same shape). """ def _merge_function(self, inputs): output = inputs[0] for i in range(1, len(inputs)): output = K.minimum(output, inputs[i]) return output class Concatenate(_Merge): """Layer that concatenates a list of inputs. It takes as input a list of tensors, all of the same shape except for the concatenation axis, and returns a single tensor, the concatenation of all inputs. # Arguments axis: Axis along which to concatenate. **kwargs: standard layer keyword arguments. """ def __init__(self, axis=-1, **kwargs): super(Concatenate, self).__init__(**kwargs) self.axis = axis self.supports_masking = True self._reshape_required = False def build(self, input_shape): # Used purely for shape validation. if not isinstance(input_shape, list) or len(input_shape) < 2: raise ValueError('A `Concatenate` layer should be called ' 'on a list of at least 2 inputs') if all([shape is None for shape in input_shape]): return reduced_inputs_shapes = [list(shape) for shape in input_shape] shape_set = set() for i in range(len(reduced_inputs_shapes)): del reduced_inputs_shapes[i][self.axis] shape_set.add(tuple(reduced_inputs_shapes[i])) if len(shape_set) > 1: raise ValueError('A `Concatenate` layer requires ' 'inputs with matching shapes ' 'except for the concat axis. ' 'Got inputs shapes: %s' % (input_shape)) def _merge_function(self, inputs): return K.concatenate(inputs, axis=self.axis) def compute_output_shape(self, input_shape): if not isinstance(input_shape, list): raise ValueError('A `Concatenate` layer should be called ' 'on a list of inputs.') input_shapes = input_shape output_shape = list(input_shapes[0]) for shape in input_shapes[1:]: if output_shape[self.axis] is None or shape[self.axis] is None: output_shape[self.axis] = None break output_shape[self.axis] += shape[self.axis] return tuple(output_shape) def compute_mask(self, inputs, mask=None): if mask is None: return None if not isinstance(mask, list): raise ValueError('`mask` should be a list.') if not isinstance(inputs, list): raise ValueError('`inputs` should be a list.') if len(mask) != len(inputs): raise ValueError('The lists `inputs` and `mask` ' 'should have the same length.') if all([m is None for m in mask]): return None # Make a list of masks while making sure # the dimensionality of each mask # is the same as the corresponding input. masks = [] for input_i, mask_i in zip(inputs, mask): if mask_i is None: # Input is unmasked. Append all 1s to masks, masks.append(K.ones_like(input_i, dtype='bool')) elif K.ndim(mask_i) < K.ndim(input_i): # Mask is smaller than the input, expand it masks.append(K.expand_dims(mask_i)) else: masks.append(mask_i) concatenated = K.concatenate(masks, axis=self.axis) return K.all(concatenated, axis=-1, keepdims=False) def get_config(self): config = { 'axis': self.axis, } base_config = super(Concatenate, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Dot(_Merge): """Layer that computes a dot product between samples in two tensors. E.g. if applied to a list of two tensors `a` and `b` of shape `(batch_size, n)`, the output will be a tensor of shape `(batch_size, 1)` where each entry `i` will be the dot product between `a[i]` and `b[i]`. # Arguments axes: Integer or tuple of integers, axis or axes along which to take the dot product. normalize: Whether to L2-normalize samples along the dot product axis before taking the dot product. If set to True, then the output of the dot product is the cosine proximity between the two samples. **kwargs: Standard layer keyword arguments. """ def __init__(self, axes, normalize=False, **kwargs): super(Dot, self).__init__(**kwargs) if not isinstance(axes, int): if not isinstance(axes, (list, tuple)): raise TypeError('Invalid type for `axes` - ' 'should be a list or an int.') if len(axes) != 2: raise ValueError('Invalid format for `axes` - ' 'should contain two elements.') if not isinstance(axes[0], int) or not isinstance(axes[1], int): raise ValueError('Invalid format for `axes` - ' 'list elements should be "int".') self.axes = axes self.normalize = normalize self.supports_masking = True self._reshape_required = False def build(self, input_shape): # Used purely for shape validation. if not isinstance(input_shape, list) or len(input_shape) != 2: raise ValueError('A `Dot` layer should be called ' 'on a list of 2 inputs.') shape1 = input_shape[0] shape2 = input_shape[1] if shape1 is None or shape2 is None: return if isinstance(self.axes, int): if self.axes < 0: axes = [self.axes % len(shape1), self.axes % len(shape2)] else: axes = [self.axes] * 2 else: axes = self.axes if shape1[axes[0]] != shape2[axes[1]]: raise ValueError( 'Dimension incompatibility ' '%s != %s. ' % (shape1[axes[0]], shape2[axes[1]]) + 'Layer shapes: %s, %s' % (shape1, shape2)) def _merge_function(self, inputs): if len(inputs) != 2: raise ValueError('A `Dot` layer should be called ' 'on exactly 2 inputs') x1 = inputs[0] x2 = inputs[1] if isinstance(self.axes, int): if self.axes < 0: axes = [self.axes % K.ndim(x1), self.axes % K.ndim(x2)] else: axes = [self.axes] * 2 else: axes = [] for i in range(len(self.axes)): if self.axes[i] < 0: axes.append(self.axes[i] % K.ndim(inputs[i])) else: axes.append(self.axes[i]) if self.normalize: x1 = K.l2_normalize(x1, axis=axes[0]) x2 = K.l2_normalize(x2, axis=axes[1]) output = K.batch_dot(x1, x2, axes) return output def compute_output_shape(self, input_shape): if not isinstance(input_shape, list) or len(input_shape) != 2: raise ValueError('A `Dot` layer should be called ' 'on a list of 2 inputs.') shape1 = list(input_shape[0]) shape2 = list(input_shape[1]) if isinstance(self.axes, int): if self.axes < 0: axes = [self.axes % len(shape1), self.axes % len(shape2)] else: axes = [self.axes] * 2 else: axes = self.axes shape1.pop(axes[0]) shape2.pop(axes[1]) shape2.pop(0) output_shape = shape1 + shape2 if len(output_shape) == 1: output_shape += [1] return tuple(output_shape) def compute_mask(self, inputs, mask=None): return None def get_config(self): config = { 'axes': self.axes, 'normalize': self.normalize, } base_config = super(Dot, self).get_config() return dict(list(base_config.items()) + list(config.items())) def add(inputs, **kwargs): """Functional interface to the `Add` layer. # Arguments inputs: A list of input tensors (at least 2). **kwargs: Standard layer keyword arguments. # Returns A tensor, the sum of the inputs. # Examples ```python import keras input1 = keras.layers.Input(shape=(16,)) x1 = keras.layers.Dense(8, activation='relu')(input1) input2 = keras.layers.Input(shape=(32,)) x2 = keras.layers.Dense(8, activation='relu')(input2) added = keras.layers.add([x1, x2]) out = keras.layers.Dense(4)(added) model = keras.models.Model(inputs=[input1, input2], outputs=out) ``` """ return Add(**kwargs)(inputs) def subtract(inputs, **kwargs): """Functional interface to the `Subtract` layer. # Arguments inputs: A list of input tensors (exactly 2). **kwargs: Standard layer keyword arguments. # Returns A tensor, the difference of the inputs. # Examples ```python import keras input1 = keras.layers.Input(shape=(16,)) x1 = keras.layers.Dense(8, activation='relu')(input1) input2 = keras.layers.Input(shape=(32,)) x2 = keras.layers.Dense(8, activation='relu')(input2) subtracted = keras.layers.subtract([x1, x2]) out = keras.layers.Dense(4)(subtracted) model = keras.models.Model(inputs=[input1, input2], outputs=out) ``` """ return Subtract(**kwargs)(inputs) def multiply(inputs, **kwargs): """Functional interface to the `Multiply` layer. # Arguments inputs: A list of input tensors (at least 2). **kwargs: Standard layer keyword arguments. # Returns A tensor, the element-wise product of the inputs. """ return Multiply(**kwargs)(inputs) def average(inputs, **kwargs): """Functional interface to the `Average` layer. # Arguments inputs: A list of input tensors (at least 2). **kwargs: Standard layer keyword arguments. # Returns A tensor, the average of the inputs. """ return Average(**kwargs)(inputs) def maximum(inputs, **kwargs): """Functional interface to the `Maximum` layer. # Arguments inputs: A list of input tensors (at least 2). **kwargs: Standard layer keyword arguments. # Returns A tensor, the element-wise maximum of the inputs. """ return Maximum(**kwargs)(inputs) def minimum(inputs, **kwargs): """Functional interface to the `Minimum` layer. # Arguments inputs: A list of input tensors (at least 2). **kwargs: Standard layer keyword arguments. # Returns A tensor, the element-wise minimum of the inputs. """ return Minimum(**kwargs)(inputs) def concatenate(inputs, axis=-1, **kwargs): """Functional interface to the `Concatenate` layer. # Arguments inputs: A list of input tensors (at least 2). axis: Concatenation axis. **kwargs: Standard layer keyword arguments. # Returns A tensor, the concatenation of the inputs alongside axis `axis`. """ return Concatenate(axis=axis, **kwargs)(inputs) def dot(inputs, axes, normalize=False, **kwargs): """Functional interface to the `Dot` layer. # Arguments inputs: A list of input tensors (at least 2). axes: Integer or tuple of integers, axis or axes along which to take the dot product. normalize: Whether to L2-normalize samples along the dot product axis before taking the dot product. If set to True, then the output of the dot product is the cosine proximity between the two samples. **kwargs: Standard layer keyword arguments. # Returns A tensor, the dot product of the samples from the inputs. """ return Dot(axes=axes, normalize=normalize, **kwargs)(inputs) Keras-2.2.4/keras/layers/convolutional.py0000644000000000116100000034116313354530144020205 0ustar rooteng00000000000000# -*- coding: utf-8 -*- """Convolutional layers. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from .. import backend as K from .. import activations from .. import initializers from .. import regularizers from .. import constraints from ..engine.base_layer import Layer from ..engine.base_layer import InputSpec from ..utils import conv_utils from ..utils.generic_utils import transpose_shape from ..legacy import interfaces # imports for backwards namespace compatibility from .pooling import AveragePooling1D from .pooling import AveragePooling2D from .pooling import AveragePooling3D from .pooling import MaxPooling1D from .pooling import MaxPooling2D from .pooling import MaxPooling3D from ..legacy.layers import AtrousConvolution1D from ..legacy.layers import AtrousConvolution2D class _Conv(Layer): """Abstract nD convolution layer (private, used as implementation base). This layer creates a convolution kernel that is convolved with the layer input to produce a tensor of outputs. If `use_bias` is True, a bias vector is created and added to the outputs. Finally, if `activation` is not `None`, it is applied to the outputs as well. # Arguments rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution. filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of n integers, specifying the dimensions of the convolution window. strides: An integer or tuple/list of n integers, specifying the strides of the convolution. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, ..., channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, ...)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". dilation_rate: An integer or tuple/list of n integers, specifying the dilation rate to use for dilated convolution. Currently, specifying any `dilation_rate` value != 1 is incompatible with specifying any `strides` value != 1. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the kernel matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). """ def __init__(self, rank, filters, kernel_size, strides=1, padding='valid', data_format=None, dilation_rate=1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs): super(_Conv, self).__init__(**kwargs) self.rank = rank self.filters = filters self.kernel_size = conv_utils.normalize_tuple(kernel_size, rank, 'kernel_size') self.strides = conv_utils.normalize_tuple(strides, rank, 'strides') self.padding = conv_utils.normalize_padding(padding) self.data_format = K.normalize_data_format(data_format) self.dilation_rate = conv_utils.normalize_tuple(dilation_rate, rank, 'dilation_rate') self.activation = activations.get(activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.bias_constraint = constraints.get(bias_constraint) self.input_spec = InputSpec(ndim=self.rank + 2) def build(self, input_shape): if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') input_dim = input_shape[channel_axis] kernel_shape = self.kernel_size + (input_dim, self.filters) self.kernel = self.add_weight(shape=kernel_shape, initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self.bias = self.add_weight(shape=(self.filters,), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None # Set input spec. self.input_spec = InputSpec(ndim=self.rank + 2, axes={channel_axis: input_dim}) self.built = True def call(self, inputs): if self.rank == 1: outputs = K.conv1d( inputs, self.kernel, strides=self.strides[0], padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate[0]) if self.rank == 2: outputs = K.conv2d( inputs, self.kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.rank == 3: outputs = K.conv3d( inputs, self.kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs def compute_output_shape(self, input_shape): if self.data_format == 'channels_last': space = input_shape[1:-1] new_space = [] for i in range(len(space)): new_dim = conv_utils.conv_output_length( space[i], self.kernel_size[i], padding=self.padding, stride=self.strides[i], dilation=self.dilation_rate[i]) new_space.append(new_dim) return (input_shape[0],) + tuple(new_space) + (self.filters,) if self.data_format == 'channels_first': space = input_shape[2:] new_space = [] for i in range(len(space)): new_dim = conv_utils.conv_output_length( space[i], self.kernel_size[i], padding=self.padding, stride=self.strides[i], dilation=self.dilation_rate[i]) new_space.append(new_dim) return (input_shape[0], self.filters) + tuple(new_space) def get_config(self): config = { 'rank': self.rank, 'filters': self.filters, 'kernel_size': self.kernel_size, 'strides': self.strides, 'padding': self.padding, 'data_format': self.data_format, 'dilation_rate': self.dilation_rate, 'activation': activations.serialize(self.activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint) } base_config = super(_Conv, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Conv1D(_Conv): """1D convolution layer (e.g. temporal convolution). This layer creates a convolution kernel that is convolved with the layer input over a single spatial (or temporal) dimension to produce a tensor of outputs. If `use_bias` is True, a bias vector is created and added to the outputs. Finally, if `activation` is not `None`, it is applied to the outputs as well. When using this layer as the first layer in a model, provide an `input_shape` argument (tuple of integers or `None`, e.g. `(10, 128)` for sequences of 10 vectors of 128-dimensional vectors, or `(None, 128)` for variable-length sequences of 128-dimensional vectors. # Arguments filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of a single integer, specifying the length of the 1D convolution window. strides: An integer or tuple/list of a single integer, specifying the stride length of the convolution. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"`, `"causal"` or `"same"` (case-insensitive). `"valid"` means "no padding". `"same"` results in padding the input such that the output has the same length as the original input. `"causal"` results in causal (dilated) convolutions, e.g. `output[t]` does not depend on `input[t + 1:]`. A zero padding is used such that the output has the same length as the original input. Useful when modeling temporal data where the model should not violate the temporal order. See [WaveNet: A Generative Model for Raw Audio, section 2.1] (https://arxiv.org/abs/1609.03499). data_format: A string, one of `"channels_last"` (default) or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, steps, channels)` (default format for temporal data in Keras) while `"channels_first"` corresponds to inputs with shape `(batch, channels, steps)`. dilation_rate: an integer or tuple/list of a single integer, specifying the dilation rate to use for dilated convolution. Currently, specifying any `dilation_rate` value != 1 is incompatible with specifying any `strides` value != 1. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the kernel matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). # Input shape 3D tensor with shape: `(batch, steps, channels)` # Output shape 3D tensor with shape: `(batch, new_steps, filters)` `steps` value might have changed due to padding or strides. """ @interfaces.legacy_conv1d_support def __init__(self, filters, kernel_size, strides=1, padding='valid', data_format='channels_last', dilation_rate=1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs): if padding == 'causal': if data_format != 'channels_last': raise ValueError('When using causal padding in `Conv1D`, ' '`data_format` must be "channels_last" ' '(temporal data).') super(Conv1D, self).__init__( rank=1, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, **kwargs) def get_config(self): config = super(Conv1D, self).get_config() config.pop('rank') return config class Conv2D(_Conv): """2D convolution layer (e.g. spatial convolution over images). This layer creates a convolution kernel that is convolved with the layer input to produce a tensor of outputs. If `use_bias` is True, a bias vector is created and added to the outputs. Finally, if `activation` is not `None`, it is applied to the outputs as well. When using this layer as the first layer in a model, provide the keyword argument `input_shape` (tuple of integers, does not include the sample axis), e.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures in `data_format="channels_last"`. # Arguments filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the height and width. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). Note that `"same"` is slightly inconsistent across backends with `strides` != 1, as described [here](https://github.com/keras-team/keras/pull/9473#issuecomment-372166860) data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, height, width, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". dilation_rate: an integer or tuple/list of 2 integers, specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all spatial dimensions. Currently, specifying any `dilation_rate` value != 1 is incompatible with specifying any stride value != 1. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the kernel matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). # Input shape 4D tensor with shape: `(batch, channels, rows, cols)` if `data_format` is `"channels_first"` or 4D tensor with shape: `(batch, rows, cols, channels)` if `data_format` is `"channels_last"`. # Output shape 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if `data_format` is `"channels_first"` or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if `data_format` is `"channels_last"`. `rows` and `cols` values might have changed due to padding. """ @interfaces.legacy_conv2d_support def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs): super(Conv2D, self).__init__( rank=2, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, **kwargs) def get_config(self): config = super(Conv2D, self).get_config() config.pop('rank') return config class Conv3D(_Conv): """3D convolution layer (e.g. spatial convolution over volumes). This layer creates a convolution kernel that is convolved with the layer input to produce a tensor of outputs. If `use_bias` is True, a bias vector is created and added to the outputs. Finally, if `activation` is not `None`, it is applied to the outputs as well. When using this layer as the first layer in a model, provide the keyword argument `input_shape` (tuple of integers, does not include the sample axis), e.g. `input_shape=(128, 128, 128, 1)` for 128x128x128 volumes with a single channel, in `data_format="channels_last"`. # Arguments filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 3 integers, specifying the depth, height and width of the 3D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 3 integers, specifying the strides of the convolution along each spatial dimension. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". dilation_rate: an integer or tuple/list of 3 integers, specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all spatial dimensions. Currently, specifying any `dilation_rate` value != 1 is incompatible with specifying any stride value != 1. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the kernel matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). # Input shape 5D tensor with shape: `(batch, channels, conv_dim1, conv_dim2, conv_dim3)` if `data_format` is `"channels_first"` or 5D tensor with shape: `(batch, conv_dim1, conv_dim2, conv_dim3, channels)` if `data_format` is `"channels_last"`. # Output shape 5D tensor with shape: `(batch, filters, new_conv_dim1, new_conv_dim2, new_conv_dim3)` if `data_format` is `"channels_first"` or 5D tensor with shape: `(batch, new_conv_dim1, new_conv_dim2, new_conv_dim3, filters)` if `data_format` is `"channels_last"`. `new_conv_dim1`, `new_conv_dim2` and `new_conv_dim3` values might have changed due to padding. """ @interfaces.legacy_conv3d_support def __init__(self, filters, kernel_size, strides=(1, 1, 1), padding='valid', data_format=None, dilation_rate=(1, 1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs): super(Conv3D, self).__init__( rank=3, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, **kwargs) def get_config(self): config = super(Conv3D, self).get_config() config.pop('rank') return config class Conv2DTranspose(Conv2D): """Transposed convolution layer (sometimes called Deconvolution). The need for transposed convolutions generally arises from the desire to use a transformation going in the opposite direction of a normal convolution, i.e., from something that has the shape of the output of some convolution to something that has the shape of its input while maintaining a connectivity pattern that is compatible with said convolution. When using this layer as the first layer in a model, provide the keyword argument `input_shape` (tuple of integers, does not include the sample axis), e.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures in `data_format="channels_last"`. # Arguments filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the height and width. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). output_padding: An integer or tuple/list of 2 integers, specifying the amount of padding along the height and width of the output tensor. Can be a single integer to specify the same value for all spatial dimensions. The amount of output padding along a given dimension must be lower than the stride along that same dimension. If set to `None` (default), the output shape is inferred. data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, height, width, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". dilation_rate: an integer or tuple/list of 2 integers, specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all spatial dimensions. Currently, specifying any `dilation_rate` value != 1 is incompatible with specifying any stride value != 1. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the kernel matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). # Input shape 4D tensor with shape: `(batch, channels, rows, cols)` if `data_format` is `"channels_first"` or 4D tensor with shape: `(batch, rows, cols, channels)` if `data_format` is `"channels_last"`. # Output shape 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if `data_format` is `"channels_first"` or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if `data_format` is `"channels_last"`. `rows` and `cols` values might have changed due to padding. If `output_padding` is specified: ``` new_rows = ((rows - 1) * strides[0] + kernel_size[0] - 2 * padding[0] + output_padding[0]) new_cols = ((cols - 1) * strides[1] + kernel_size[1] - 2 * padding[1] + output_padding[1]) ``` # References - [A guide to convolution arithmetic for deep learning] (https://arxiv.org/abs/1603.07285v1) - [Deconvolutional Networks] (http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf) """ @interfaces.legacy_deconv2d_support def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs): super(Conv2DTranspose, self).__init__( filters, kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, **kwargs) self.output_padding = output_padding if self.output_padding is not None: self.output_padding = conv_utils.normalize_tuple( self.output_padding, 2, 'output_padding') for stride, out_pad in zip(self.strides, self.output_padding): if out_pad >= stride: raise ValueError('Stride ' + str(self.strides) + ' must be ' 'greater than output padding ' + str(self.output_padding)) def build(self, input_shape): if len(input_shape) != 4: raise ValueError('Inputs should have rank ' + str(4) + '; Received input shape:', str(input_shape)) if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') input_dim = input_shape[channel_axis] kernel_shape = self.kernel_size + (self.filters, input_dim) self.kernel = self.add_weight(shape=kernel_shape, initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self.bias = self.add_weight(shape=(self.filters,), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None # Set input spec. self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) self.built = True def call(self, inputs): input_shape = K.shape(inputs) batch_size = input_shape[0] if self.data_format == 'channels_first': h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = input_shape[h_axis], input_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding # Infer the dynamic output shape: out_height = conv_utils.deconv_length(height, stride_h, kernel_h, self.padding, out_pad_h, self.dilation_rate[0]) out_width = conv_utils.deconv_length(width, stride_w, kernel_w, self.padding, out_pad_w, self.dilation_rate[1]) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, self.filters) outputs = K.conv2d_transpose( inputs, self.kernel, output_shape, self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs def compute_output_shape(self, input_shape): output_shape = list(input_shape) if self.data_format == 'channels_first': c_axis, h_axis, w_axis = 1, 2, 3 else: c_axis, h_axis, w_axis = 3, 1, 2 kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding output_shape[c_axis] = self.filters output_shape[h_axis] = conv_utils.deconv_length(output_shape[h_axis], stride_h, kernel_h, self.padding, out_pad_h, self.dilation_rate[0]) output_shape[w_axis] = conv_utils.deconv_length(output_shape[w_axis], stride_w, kernel_w, self.padding, out_pad_w, self.dilation_rate[1]) return tuple(output_shape) def get_config(self): config = super(Conv2DTranspose, self).get_config() config['output_padding'] = self.output_padding return config class Conv3DTranspose(Conv3D): """Transposed convolution layer (sometimes called Deconvolution). The need for transposed convolutions generally arises from the desire to use a transformation going in the opposite direction of a normal convolution, i.e., from something that has the shape of the output of some convolution to something that has the shape of its input while maintaining a connectivity pattern that is compatible with said convolution. When using this layer as the first layer in a model, provide the keyword argument `input_shape` (tuple of integers, does not include the sample axis), e.g. `input_shape=(128, 128, 128, 3)` for a 128x128x128 volume with 3 channels if `data_format="channels_last"`. # Arguments filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 3 integers, specifying the depth, height and width of the 3D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 3 integers, specifying the strides of the convolution along the depth, height and width. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). output_padding: An integer or tuple/list of 3 integers, specifying the amount of padding along the depth, height, and width. Can be a single integer to specify the same value for all spatial dimensions. The amount of output padding along a given dimension must be lower than the stride along that same dimension. If set to `None` (default), the output shape is inferred. data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, depth, height, width, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, depth, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". dilation_rate: an integer or tuple/list of 3 integers, specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all spatial dimensions. Currently, specifying any `dilation_rate` value != 1 is incompatible with specifying any stride value != 1. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the kernel matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). # Input shape 5D tensor with shape: `(batch, channels, depth, rows, cols)` if `data_format` is `"channels_first"` or 5D tensor with shape: `(batch, depth, rows, cols, channels)` if `data_format` is `"channels_last"`. # Output shape 5D tensor with shape: `(batch, filters, new_depth, new_rows, new_cols)` if `data_format` is `"channels_first"` or 5D tensor with shape: `(batch, new_depth, new_rows, new_cols, filters)` if `data_format` is `"channels_last"`. `depth` and `rows` and `cols` values might have changed due to padding. If `output_padding` is specified:: ``` new_depth = ((depth - 1) * strides[0] + kernel_size[0] - 2 * padding[0] + output_padding[0]) new_rows = ((rows - 1) * strides[1] + kernel_size[1] - 2 * padding[1] + output_padding[1]) new_cols = ((cols - 1) * strides[2] + kernel_size[2] - 2 * padding[2] + output_padding[2]) ``` # References - [A guide to convolution arithmetic for deep learning] (https://arxiv.org/abs/1603.07285v1) - [Deconvolutional Networks] (http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf) """ def __init__(self, filters, kernel_size, strides=(1, 1, 1), padding='valid', output_padding=None, data_format=None, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs): super(Conv3DTranspose, self).__init__( filters, kernel_size, strides=strides, padding=padding, data_format=data_format, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, **kwargs) self.output_padding = output_padding if self.output_padding is not None: self.output_padding = conv_utils.normalize_tuple( self.output_padding, 3, 'output_padding') for stride, out_pad in zip(self.strides, self.output_padding): if out_pad >= stride: raise ValueError('Stride ' + str(self.strides) + ' must be ' 'greater than output padding ' + str(self.output_padding)) def build(self, input_shape): if len(input_shape) != 5: raise ValueError('Inputs should have rank ' + str(5) + '; Received input shape:', str(input_shape)) if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') input_dim = input_shape[channel_axis] kernel_shape = self.kernel_size + (self.filters, input_dim) self.kernel = self.add_weight(shape=kernel_shape, initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self.bias = self.add_weight(shape=(self.filters,), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None # Set input spec. self.input_spec = InputSpec(ndim=5, axes={channel_axis: input_dim}) self.built = True def call(self, inputs): input_shape = K.shape(inputs) batch_size = input_shape[0] if self.data_format == 'channels_first': d_axis, h_axis, w_axis = 2, 3, 4 else: d_axis, h_axis, w_axis = 1, 2, 3 depth = input_shape[d_axis] height = input_shape[h_axis] width = input_shape[w_axis] kernel_d, kernel_h, kernel_w = self.kernel_size stride_d, stride_h, stride_w = self.strides if self.output_padding is None: out_pad_d = out_pad_h = out_pad_w = None else: out_pad_d, out_pad_h, out_pad_w = self.output_padding # Infer the dynamic output shape: out_depth = conv_utils.deconv_length(depth, stride_d, kernel_d, self.padding, out_pad_d) out_height = conv_utils.deconv_length(height, stride_h, kernel_h, self.padding, out_pad_h) out_width = conv_utils.deconv_length(width, stride_w, kernel_w, self.padding, out_pad_w) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_depth, out_height, out_width) else: output_shape = (batch_size, out_depth, out_height, out_width, self.filters) outputs = K.conv3d_transpose(inputs, self.kernel, output_shape, self.strides, padding=self.padding, data_format=self.data_format) if self.use_bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs def compute_output_shape(self, input_shape): output_shape = list(input_shape) if self.data_format == 'channels_first': c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4 else: c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3 kernel_d, kernel_h, kernel_w = self.kernel_size stride_d, stride_h, stride_w = self.strides if self.output_padding is None: out_pad_d = out_pad_h = out_pad_w = None else: out_pad_d, out_pad_h, out_pad_w = self.output_padding output_shape[c_axis] = self.filters output_shape[d_axis] = conv_utils.deconv_length(output_shape[d_axis], stride_d, kernel_d, self.padding, out_pad_d) output_shape[h_axis] = conv_utils.deconv_length(output_shape[h_axis], stride_h, kernel_h, self.padding, out_pad_h) output_shape[w_axis] = conv_utils.deconv_length(output_shape[w_axis], stride_w, kernel_w, self.padding, out_pad_w) return tuple(output_shape) def get_config(self): config = super(Conv3DTranspose, self).get_config() config.pop('dilation_rate') config['output_padding'] = self.output_padding return config class _SeparableConv(_Conv): """Abstract nD depthwise separable convolution layer (private). Separable convolutions consist in first performing a depthwise spatial convolution (which acts on each input channel separately) followed by a pointwise convolution which mixes together the resulting output channels. The `depth_multiplier` argument controls how many output channels are generated per input channel in the depthwise step. Intuitively, separable convolutions can be understood as a way to factorize a convolution kernel into two smaller kernels, or as an extreme version of an Inception block. # Arguments rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution. filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the height and width. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, height, width, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". dilation_rate: an integer or tuple/list of n integers, specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all spatial dimensions. Currently, specifying any `dilation_rate` value != 1 is incompatible with specifying any stride value != 1. depth_multiplier: The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to `filters_in * depth_multiplier`. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. depthwise_initializer: Initializer for the depthwise kernel matrix (see [initializers](../initializers.md)). pointwise_initializer: Initializer for the pointwise kernel matrix (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). depthwise_regularizer: Regularizer function applied to the depthwise kernel matrix (see [regularizer](../regularizers.md)). pointwise_regularizer: Regularizer function applied to the pointwise kernel matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). depthwise_constraint: Constraint function applied to the depthwise kernel matrix (see [constraints](../constraints.md)). pointwise_constraint: Constraint function applied to the pointwise kernel matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). # Input shape 4D tensor with shape: `(batch, channels, rows, cols)` if `data_format` is `"channels_first"` or 4D tensor with shape: `(batch, rows, cols, channels)` if `data_format` is `"channels_last"`. # Output shape 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if `data_format` is `"channels_first"` or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if `data_format` is `"channels_last"`. `rows` and `cols` values might have changed due to padding. """ def __init__(self, rank, filters, kernel_size, strides=1, padding='valid', data_format=None, dilation_rate=1, depth_multiplier=1, activation=None, use_bias=True, depthwise_initializer='glorot_uniform', pointwise_initializer='glorot_uniform', bias_initializer='zeros', depthwise_regularizer=None, pointwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, pointwise_constraint=None, bias_constraint=None, **kwargs): super(_SeparableConv, self).__init__( rank=rank, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, bias_initializer=bias_initializer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, bias_constraint=bias_constraint, **kwargs) self.depth_multiplier = depth_multiplier self.depthwise_initializer = initializers.get(depthwise_initializer) self.pointwise_initializer = initializers.get(pointwise_initializer) self.depthwise_regularizer = regularizers.get(depthwise_regularizer) self.pointwise_regularizer = regularizers.get(pointwise_regularizer) self.depthwise_constraint = constraints.get(depthwise_constraint) self.pointwise_constraint = constraints.get(pointwise_constraint) def build(self, input_shape): if len(input_shape) < self.rank + 2: raise ValueError('Inputs to `SeparableConv' + str(self.rank) + 'D` ' 'should have rank ' + str(self.rank + 2) + '. ' 'Received input shape:', str(input_shape)) channel_axis = 1 if self.data_format == 'channels_first' else -1 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') input_dim = int(input_shape[channel_axis]) depthwise_kernel_shape = (input_dim, self.depth_multiplier) depthwise_kernel_shape = self.kernel_size + depthwise_kernel_shape pointwise_kernel_shape = (self.depth_multiplier * input_dim, self.filters) pointwise_kernel_shape = (1,) * self.rank + pointwise_kernel_shape self.depthwise_kernel = self.add_weight( shape=depthwise_kernel_shape, initializer=self.depthwise_initializer, name='depthwise_kernel', regularizer=self.depthwise_regularizer, constraint=self.depthwise_constraint) self.pointwise_kernel = self.add_weight( shape=pointwise_kernel_shape, initializer=self.pointwise_initializer, name='pointwise_kernel', regularizer=self.pointwise_regularizer, constraint=self.pointwise_constraint) if self.use_bias: self.bias = self.add_weight(shape=(self.filters,), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None # Set input spec. self.input_spec = InputSpec(ndim=self.rank + 2, axes={channel_axis: input_dim}) self.built = True def call(self, inputs): if self.rank == 1: outputs = K.separable_conv1d( inputs, self.depthwise_kernel, self.pointwise_kernel, data_format=self.data_format, strides=self.strides, padding=self.padding, dilation_rate=self.dilation_rate) if self.rank == 2: outputs = K.separable_conv2d( inputs, self.depthwise_kernel, self.pointwise_kernel, data_format=self.data_format, strides=self.strides, padding=self.padding, dilation_rate=self.dilation_rate) if self.use_bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs def get_config(self): config = super(_SeparableConv, self).get_config() config.pop('rank') config.pop('kernel_initializer') config.pop('kernel_regularizer') config.pop('kernel_constraint') config['depth_multiplier'] = self.depth_multiplier config['depthwise_initializer'] = ( initializers.serialize(self.depthwise_initializer)) config['pointwise_initializer'] = ( initializers.serialize(self.pointwise_initializer)) config['depthwise_regularizer'] = ( regularizers.serialize(self.depthwise_regularizer)) config['pointwise_regularizer'] = ( regularizers.serialize(self.pointwise_regularizer)) config['depthwise_constraint'] = ( constraints.serialize(self.depthwise_constraint)) config['pointwise_constraint'] = ( constraints.serialize(self.pointwise_constraint)) return config class SeparableConv1D(_SeparableConv): """Depthwise separable 1D convolution. Separable convolutions consist in first performing a depthwise spatial convolution (which acts on each input channel separately) followed by a pointwise convolution which mixes together the resulting output channels. The `depth_multiplier` argument controls how many output channels are generated per input channel in the depthwise step. Intuitively, separable convolutions can be understood as a way to factorize a convolution kernel into two smaller kernels, or as an extreme version of an Inception block. # Arguments filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of single integer, specifying the length of the 1D convolution window. strides: An integer or tuple/list of single integer, specifying the stride length of the convolution. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, steps, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, steps)`. dilation_rate: An integer or tuple/list of a single integer, specifying the dilation rate to use for dilated convolution. Currently, specifying any `dilation_rate` value != 1 is incompatible with specifying any `strides` value != 1. depth_multiplier: The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to `filters_in * depth_multiplier`. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. depthwise_initializer: Initializer for the depthwise kernel matrix (see [initializers](../initializers.md)). pointwise_initializer: Initializer for the pointwise kernel matrix (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). depthwise_regularizer: Regularizer function applied to the depthwise kernel matrix (see [regularizer](../regularizers.md)). pointwise_regularizer: Regularizer function applied to the pointwise kernel matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). depthwise_constraint: Constraint function applied to the depthwise kernel matrix (see [constraints](../constraints.md)). pointwise_constraint: Constraint function applied to the pointwise kernel matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). # Input shape 3D tensor with shape: `(batch, channels, steps)` if `data_format` is `"channels_first"` or 3D tensor with shape: `(batch, steps, channels)` if `data_format` is `"channels_last"`. # Output shape 3D tensor with shape: `(batch, filters, new_steps)` if `data_format` is `"channels_first"` or 3D tensor with shape: `(batch, new_steps, filters)` if `data_format` is `"channels_last"`. `new_steps` values might have changed due to padding or strides. """ def __init__(self, filters, kernel_size, strides=1, padding='valid', data_format='channels_last', dilation_rate=1, depth_multiplier=1, activation=None, use_bias=True, depthwise_initializer='glorot_uniform', pointwise_initializer='glorot_uniform', bias_initializer='zeros', depthwise_regularizer=None, pointwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, pointwise_constraint=None, bias_constraint=None, **kwargs): super(SeparableConv1D, self).__init__( rank=1, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, depth_multiplier=depth_multiplier, activation=activation, use_bias=use_bias, depthwise_initializer=depthwise_initializer, pointwise_initializer=pointwise_initializer, bias_initializer=bias_initializer, depthwise_regularizer=depthwise_regularizer, pointwise_regularizer=pointwise_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, depthwise_constraint=depthwise_constraint, pointwise_constraint=pointwise_constraint, bias_constraint=bias_constraint, **kwargs) class SeparableConv2D(_SeparableConv): """Depthwise separable 2D convolution. Separable convolutions consist in first performing a depthwise spatial convolution (which acts on each input channel separately) followed by a pointwise convolution which mixes together the resulting output channels. The `depth_multiplier` argument controls how many output channels are generated per input channel in the depthwise step. Intuitively, separable convolutions can be understood as a way to factorize a convolution kernel into two smaller kernels, or as an extreme version of an Inception block. # Arguments filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the height and width. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, height, width, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". dilation_rate: An integer or tuple/list of 2 integers, specifying the dilation rate to use for dilated convolution. Currently, specifying any `dilation_rate` value != 1 is incompatible with specifying any `strides` value != 1. depth_multiplier: The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to `filters_in * depth_multiplier`. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. depthwise_initializer: Initializer for the depthwise kernel matrix (see [initializers](../initializers.md)). pointwise_initializer: Initializer for the pointwise kernel matrix (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). depthwise_regularizer: Regularizer function applied to the depthwise kernel matrix (see [regularizer](../regularizers.md)). pointwise_regularizer: Regularizer function applied to the pointwise kernel matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). depthwise_constraint: Constraint function applied to the depthwise kernel matrix (see [constraints](../constraints.md)). pointwise_constraint: Constraint function applied to the pointwise kernel matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). # Input shape 4D tensor with shape: `(batch, channels, rows, cols)` if `data_format` is `"channels_first"` or 4D tensor with shape: `(batch, rows, cols, channels)` if `data_format` is `"channels_last"`. # Output shape 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if `data_format` is `"channels_first"` or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if `data_format` is `"channels_last"`. `rows` and `cols` values might have changed due to padding. """ @interfaces.legacy_separable_conv2d_support def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), depth_multiplier=1, activation=None, use_bias=True, depthwise_initializer='glorot_uniform', pointwise_initializer='glorot_uniform', bias_initializer='zeros', depthwise_regularizer=None, pointwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, pointwise_constraint=None, bias_constraint=None, **kwargs): super(SeparableConv2D, self).__init__( rank=2, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, depth_multiplier=depth_multiplier, activation=activation, use_bias=use_bias, depthwise_initializer=depthwise_initializer, pointwise_initializer=pointwise_initializer, bias_initializer=bias_initializer, depthwise_regularizer=depthwise_regularizer, pointwise_regularizer=pointwise_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, depthwise_constraint=depthwise_constraint, pointwise_constraint=pointwise_constraint, bias_constraint=bias_constraint, **kwargs) class DepthwiseConv2D(Conv2D): """Depthwise separable 2D convolution. Depthwise Separable convolutions consists in performing just the first step in a depthwise spatial convolution (which acts on each input channel separately). The `depth_multiplier` argument controls how many output channels are generated per input channel in the depthwise step. # Arguments kernel_size: An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the height and width. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). depth_multiplier: The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to `filters_in * depth_multiplier`. data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, height, width, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be 'channels_last'. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. 'linear' activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. depthwise_initializer: Initializer for the depthwise kernel matrix (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). depthwise_regularizer: Regularizer function applied to the depthwise kernel matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its 'activation'). (see [regularizer](../regularizers.md)). depthwise_constraint: Constraint function applied to the depthwise kernel matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). # Input shape 4D tensor with shape: `(batch, channels, rows, cols)` if `data_format` is `"channels_first"` or 4D tensor with shape: `(batch, rows, cols, channels)` if `data_format` is `"channels_last"`. # Output shape 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if `data_format` is `"channels_first"` or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if `data_format` is `"channels_last"`. `rows` and `cols` values might have changed due to padding. """ def __init__(self, kernel_size, strides=(1, 1), padding='valid', depth_multiplier=1, data_format=None, activation=None, use_bias=True, depthwise_initializer='glorot_uniform', bias_initializer='zeros', depthwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, bias_constraint=None, **kwargs): super(DepthwiseConv2D, self).__init__( filters=None, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, activation=activation, use_bias=use_bias, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, bias_constraint=bias_constraint, **kwargs) self.depth_multiplier = depth_multiplier self.depthwise_initializer = initializers.get(depthwise_initializer) self.depthwise_regularizer = regularizers.get(depthwise_regularizer) self.depthwise_constraint = constraints.get(depthwise_constraint) self.bias_initializer = initializers.get(bias_initializer) def build(self, input_shape): if len(input_shape) < 4: raise ValueError('Inputs to `DepthwiseConv2D` should have rank 4. ' 'Received input shape:', str(input_shape)) if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = 3 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs to ' '`DepthwiseConv2D` ' 'should be defined. Found `None`.') input_dim = int(input_shape[channel_axis]) depthwise_kernel_shape = (self.kernel_size[0], self.kernel_size[1], input_dim, self.depth_multiplier) self.depthwise_kernel = self.add_weight( shape=depthwise_kernel_shape, initializer=self.depthwise_initializer, name='depthwise_kernel', regularizer=self.depthwise_regularizer, constraint=self.depthwise_constraint) if self.use_bias: self.bias = self.add_weight(shape=(input_dim * self.depth_multiplier,), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None # Set input spec. self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) self.built = True def call(self, inputs, training=None): outputs = K.depthwise_conv2d( inputs, self.depthwise_kernel, strides=self.strides, padding=self.padding, dilation_rate=self.dilation_rate, data_format=self.data_format) if self.use_bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs def compute_output_shape(self, input_shape): if self.data_format == 'channels_first': rows = input_shape[2] cols = input_shape[3] out_filters = input_shape[1] * self.depth_multiplier elif self.data_format == 'channels_last': rows = input_shape[1] cols = input_shape[2] out_filters = input_shape[3] * self.depth_multiplier rows = conv_utils.conv_output_length(rows, self.kernel_size[0], self.padding, self.strides[0]) cols = conv_utils.conv_output_length(cols, self.kernel_size[1], self.padding, self.strides[1]) if self.data_format == 'channels_first': return (input_shape[0], out_filters, rows, cols) elif self.data_format == 'channels_last': return (input_shape[0], rows, cols, out_filters) def get_config(self): config = super(DepthwiseConv2D, self).get_config() config.pop('filters') config.pop('kernel_initializer') config.pop('kernel_regularizer') config.pop('kernel_constraint') config['depth_multiplier'] = self.depth_multiplier config['depthwise_initializer'] = ( initializers.serialize(self.depthwise_initializer)) config['depthwise_regularizer'] = ( regularizers.serialize(self.depthwise_regularizer)) config['depthwise_constraint'] = ( constraints.serialize(self.depthwise_constraint)) return config class _UpSampling(Layer): """Abstract nD UpSampling layer (private, used as implementation base). # Arguments size: Tuple of ints. data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, ..., channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, ...)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". """ def __init__(self, size, data_format=None, **kwargs): # self.rank is 1 for UpSampling1D, 2 for UpSampling2D. self.rank = len(size) self.size = size self.data_format = K.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=self.rank + 2) super(_UpSampling, self).__init__(**kwargs) def call(self, inputs): raise NotImplementedError def compute_output_shape(self, input_shape): size_all_dims = (1,) + self.size + (1,) spatial_axes = list(range(1, 1 + self.rank)) size_all_dims = transpose_shape(size_all_dims, self.data_format, spatial_axes) output_shape = list(input_shape) for dim in range(len(output_shape)): if output_shape[dim] is not None: output_shape[dim] *= size_all_dims[dim] return tuple(output_shape) def get_config(self): config = {'size': self.size, 'data_format': self.data_format} base_config = super(_UpSampling, self).get_config() return dict(list(base_config.items()) + list(config.items())) class UpSampling1D(_UpSampling): """Upsampling layer for 1D inputs. Repeats each temporal step `size` times along the time axis. # Arguments size: integer. Upsampling factor. # Input shape 3D tensor with shape: `(batch, steps, features)`. # Output shape 3D tensor with shape: `(batch, upsampled_steps, features)`. """ @interfaces.legacy_upsampling1d_support def __init__(self, size=2, **kwargs): super(UpSampling1D, self).__init__((int(size),), 'channels_last', **kwargs) def call(self, inputs): output = K.repeat_elements(inputs, self.size[0], axis=1) return output def get_config(self): config = super(UpSampling1D, self).get_config() config['size'] = self.size[0] config.pop('data_format') return config class UpSampling2D(_UpSampling): """Upsampling layer for 2D inputs. Repeats the rows and columns of the data by size[0] and size[1] respectively. # Arguments size: int, or tuple of 2 integers. The upsampling factors for rows and columns. data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, height, width, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". interpolation: A string, one of `nearest` or `bilinear`. Note that CNTK does not support yet the `bilinear` upscaling and that with Theano, only `size=(2, 2)` is possible. # Input shape 4D tensor with shape: - If `data_format` is `"channels_last"`: `(batch, rows, cols, channels)` - If `data_format` is `"channels_first"`: `(batch, channels, rows, cols)` # Output shape 4D tensor with shape: - If `data_format` is `"channels_last"`: `(batch, upsampled_rows, upsampled_cols, channels)` - If `data_format` is `"channels_first"`: `(batch, channels, upsampled_rows, upsampled_cols)` """ @interfaces.legacy_upsampling2d_support def __init__(self, size=(2, 2), data_format=None, interpolation='nearest', **kwargs): normalized_size = conv_utils.normalize_tuple(size, 2, 'size') super(UpSampling2D, self).__init__(normalized_size, data_format, **kwargs) if interpolation not in ['nearest', 'bilinear']: raise ValueError('interpolation should be one ' 'of "nearest" or "bilinear".') self.interpolation = interpolation def call(self, inputs): return K.resize_images(inputs, self.size[0], self.size[1], self.data_format, self.interpolation) def get_config(self): config = super(UpSampling2D, self).get_config() config['interpolation'] = self.interpolation return config class UpSampling3D(_UpSampling): """Upsampling layer for 3D inputs. Repeats the 1st, 2nd and 3rd dimensions of the data by size[0], size[1] and size[2] respectively. # Arguments size: int, or tuple of 3 integers. The upsampling factors for dim1, dim2 and dim3. data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape 5D tensor with shape: - If `data_format` is `"channels_last"`: `(batch, dim1, dim2, dim3, channels)` - If `data_format` is `"channels_first"`: `(batch, channels, dim1, dim2, dim3)` # Output shape 5D tensor with shape: - If `data_format` is `"channels_last"`: `(batch, upsampled_dim1, upsampled_dim2, upsampled_dim3, channels)` - If `data_format` is `"channels_first"`: `(batch, channels, upsampled_dim1, upsampled_dim2, upsampled_dim3)` """ @interfaces.legacy_upsampling3d_support def __init__(self, size=(2, 2, 2), data_format=None, **kwargs): normalized_size = conv_utils.normalize_tuple(size, 3, 'size') super(UpSampling3D, self).__init__(normalized_size, data_format, **kwargs) def call(self, inputs): return K.resize_volumes(inputs, self.size[0], self.size[1], self.size[2], self.data_format) class _ZeroPadding(Layer): """Abstract nD ZeroPadding layer (private, used as implementation base). # Arguments padding: Tuple of tuples of two ints. Can be a tuple of ints when rank is 1. data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, ..., channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, ...)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". """ def __init__(self, padding, data_format=None, **kwargs): # self.rank is 1 for ZeroPadding1D, 2 for ZeroPadding2D. self.rank = len(padding) self.padding = padding self.data_format = K.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=self.rank + 2) super(_ZeroPadding, self).__init__(**kwargs) def call(self, inputs): raise NotImplementedError def compute_output_shape(self, input_shape): padding_all_dims = ((0, 0),) + self.padding + ((0, 0),) spatial_axes = list(range(1, 1 + self.rank)) padding_all_dims = transpose_shape(padding_all_dims, self.data_format, spatial_axes) output_shape = list(input_shape) for dim in range(len(output_shape)): if output_shape[dim] is not None: output_shape[dim] += sum(padding_all_dims[dim]) return tuple(output_shape) def get_config(self): config = {'padding': self.padding, 'data_format': self.data_format} base_config = super(_ZeroPadding, self).get_config() return dict(list(base_config.items()) + list(config.items())) class ZeroPadding1D(_ZeroPadding): """Zero-padding layer for 1D input (e.g. temporal sequence). # Arguments padding: int, or tuple of int (length 2), or dictionary. - If int: How many zeros to add at the beginning and end of the padding dimension (axis 1). - If tuple of int (length 2): How many zeros to add at the beginning and at the end of the padding dimension (`(left_pad, right_pad)`). # Input shape 3D tensor with shape `(batch, axis_to_pad, features)` # Output shape 3D tensor with shape `(batch, padded_axis, features)` """ def __init__(self, padding=1, **kwargs): normalized_padding = (conv_utils.normalize_tuple(padding, 2, 'padding'),) super(ZeroPadding1D, self).__init__(normalized_padding, 'channels_last', **kwargs) def call(self, inputs): return K.temporal_padding(inputs, padding=self.padding[0]) def get_config(self): config = super(ZeroPadding1D, self).get_config() config['padding'] = config['padding'][0] config.pop('data_format') return config class ZeroPadding2D(_ZeroPadding): """Zero-padding layer for 2D input (e.g. picture). This layer can add rows and columns of zeros at the top, bottom, left and right side of an image tensor. # Arguments padding: int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints. - If int: the same symmetric padding is applied to height and width. - If tuple of 2 ints: interpreted as two different symmetric padding values for height and width: `(symmetric_height_pad, symmetric_width_pad)`. - If tuple of 2 tuples of 2 ints: interpreted as `((top_pad, bottom_pad), (left_pad, right_pad))` data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, height, width, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape 4D tensor with shape: - If `data_format` is `"channels_last"`: `(batch, rows, cols, channels)` - If `data_format` is `"channels_first"`: `(batch, channels, rows, cols)` # Output shape 4D tensor with shape: - If `data_format` is `"channels_last"`: `(batch, padded_rows, padded_cols, channels)` - If `data_format` is `"channels_first"`: `(batch, channels, padded_rows, padded_cols)` """ @interfaces.legacy_zeropadding2d_support def __init__(self, padding=(1, 1), data_format=None, **kwargs): if isinstance(padding, int): normalized_padding = ((padding, padding), (padding, padding)) elif hasattr(padding, '__len__'): if len(padding) != 2: raise ValueError('`padding` should have two elements. ' 'Found: ' + str(padding)) height_padding = conv_utils.normalize_tuple(padding[0], 2, '1st entry of padding') width_padding = conv_utils.normalize_tuple(padding[1], 2, '2nd entry of padding') normalized_padding = (height_padding, width_padding) else: raise ValueError('`padding` should be either an int, ' 'a tuple of 2 ints ' '(symmetric_height_pad, symmetric_width_pad), ' 'or a tuple of 2 tuples of 2 ints ' '((top_pad, bottom_pad), (left_pad, right_pad)). ' 'Found: ' + str(padding)) super(ZeroPadding2D, self).__init__(normalized_padding, data_format, **kwargs) def call(self, inputs): return K.spatial_2d_padding(inputs, padding=self.padding, data_format=self.data_format) class ZeroPadding3D(_ZeroPadding): """Zero-padding layer for 3D data (spatial or spatio-temporal). # Arguments padding: int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints. - If int: the same symmetric padding is applied to height and width. - If tuple of 3 ints: interpreted as two different symmetric padding values for height and width: `(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad)`. - If tuple of 3 tuples of 2 ints: interpreted as `((left_dim1_pad, right_dim1_pad), (left_dim2_pad, right_dim2_pad), (left_dim3_pad, right_dim3_pad))` data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape 5D tensor with shape: - If `data_format` is `"channels_last"`: `(batch, first_axis_to_pad, second_axis_to_pad, third_axis_to_pad, depth)` - If `data_format` is `"channels_first"`: `(batch, depth, first_axis_to_pad, second_axis_to_pad, third_axis_to_pad)` # Output shape 5D tensor with shape: - If `data_format` is `"channels_last"`: `(batch, first_padded_axis, second_padded_axis, third_axis_to_pad, depth)` - If `data_format` is `"channels_first"`: `(batch, depth, first_padded_axis, second_padded_axis, third_axis_to_pad)` """ @interfaces.legacy_zeropadding3d_support def __init__(self, padding=(1, 1, 1), data_format=None, **kwargs): if isinstance(padding, int): normalized_padding = 3 * ((padding, padding),) elif hasattr(padding, '__len__'): if len(padding) != 3: raise ValueError('`padding` should have 3 elements. ' 'Found: ' + str(padding)) dim1_padding = conv_utils.normalize_tuple(padding[0], 2, '1st entry of padding') dim2_padding = conv_utils.normalize_tuple(padding[1], 2, '2nd entry of padding') dim3_padding = conv_utils.normalize_tuple(padding[2], 2, '3rd entry of padding') normalized_padding = (dim1_padding, dim2_padding, dim3_padding) else: raise ValueError( '`padding` should be either an int, a tuple of 3 ints ' '(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad), ' 'or a tuple of 3 tuples of 2 ints ' '((left_dim1_pad, right_dim1_pad),' ' (left_dim2_pad, right_dim2_pad),' ' (left_dim3_pad, right_dim2_pad)). ' 'Found: ' + str(padding)) super(ZeroPadding3D, self).__init__(normalized_padding, data_format, **kwargs) def call(self, inputs): return K.spatial_3d_padding(inputs, padding=self.padding, data_format=self.data_format) class _Cropping(Layer): """Abstract nD copping layer (private, used as implementation base). # Arguments cropping: A tuple of tuples of 2 ints. data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, ..., channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, ...)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". For Cropping1D, the data format is always `"channels_last"`. """ def __init__(self, cropping, data_format=None, **kwargs): super(_Cropping, self).__init__(**kwargs) # self.rank is 1 for Cropping1D, 2 for Cropping2D... self.rank = len(cropping) self.cropping = cropping self.data_format = K.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=2 + self.rank) def call(self, inputs): slices_dims = [] for start, end in self.cropping: if end == 0: end = None else: end = -end slices_dims.append(slice(start, end)) slices = [slice(None)] + slices_dims + [slice(None)] slices = tuple(slices) spatial_axes = list(range(1, 1 + self.rank)) slices = transpose_shape(slices, self.data_format, spatial_axes) return inputs[slices] def compute_output_shape(self, input_shape): cropping_all_dims = ((0, 0),) + self.cropping + ((0, 0),) spatial_axes = list(range(1, 1 + self.rank)) cropping_all_dims = transpose_shape(cropping_all_dims, self.data_format, spatial_axes) output_shape = list(input_shape) for dim in range(len(output_shape)): if output_shape[dim] is not None: output_shape[dim] -= sum(cropping_all_dims[dim]) return tuple(output_shape) def get_config(self): config = {'cropping': self.cropping, 'data_format': self.data_format} base_config = super(_Cropping, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Cropping1D(_Cropping): """Cropping layer for 1D input (e.g. temporal sequence). It crops along the time dimension (axis 1). # Arguments cropping: int or tuple of int (length 2) How many units should be trimmed off at the beginning and end of the cropping dimension (axis 1). If a single int is provided, the same value will be used for both. # Input shape 3D tensor with shape `(batch, axis_to_crop, features)` # Output shape 3D tensor with shape `(batch, cropped_axis, features)` """ def __init__(self, cropping=(1, 1), **kwargs): normalized_cropping = (conv_utils.normalize_tuple(cropping, 2, 'cropping'),) super(Cropping1D, self).__init__(normalized_cropping, 'channels_last', **kwargs) def get_config(self): base_config = super(Cropping1D, self).get_config() base_config.pop('data_format') base_config['cropping'] = base_config['cropping'][0] return base_config class Cropping2D(_Cropping): """Cropping layer for 2D input (e.g. picture). It crops along spatial dimensions, i.e. height and width. # Arguments cropping: int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints. - If int: the same symmetric cropping is applied to height and width. - If tuple of 2 ints: interpreted as two different symmetric cropping values for height and width: `(symmetric_height_crop, symmetric_width_crop)`. - If tuple of 2 tuples of 2 ints: interpreted as `((top_crop, bottom_crop), (left_crop, right_crop))` data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, height, width, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape 4D tensor with shape: - If `data_format` is `"channels_last"`: `(batch, rows, cols, channels)` - If `data_format` is `"channels_first"`: `(batch, channels, rows, cols)` # Output shape 4D tensor with shape: - If `data_format` is `"channels_last"`: `(batch, cropped_rows, cropped_cols, channels)` - If `data_format` is `"channels_first"`: `(batch, channels, cropped_rows, cropped_cols)` # Examples ```python # Crop the input 2D images or feature maps model = Sequential() model.add(Cropping2D(cropping=((2, 2), (4, 4)), input_shape=(28, 28, 3))) # now model.output_shape == (None, 24, 20, 3) model.add(Conv2D(64, (3, 3), padding='same')) model.add(Cropping2D(cropping=((2, 2), (2, 2)))) # now model.output_shape == (None, 20, 16, 64) ``` """ @interfaces.legacy_cropping2d_support def __init__(self, cropping=((0, 0), (0, 0)), data_format=None, **kwargs): if isinstance(cropping, int): normalized_cropping = ((cropping, cropping), (cropping, cropping)) elif hasattr(cropping, '__len__'): if len(cropping) != 2: raise ValueError('`cropping` should have two elements. ' 'Found: ' + str(cropping)) height_cropping = conv_utils.normalize_tuple( cropping[0], 2, '1st entry of cropping') width_cropping = conv_utils.normalize_tuple( cropping[1], 2, '2nd entry of cropping') normalized_cropping = (height_cropping, width_cropping) else: raise ValueError('`cropping` should be either an int, ' 'a tuple of 2 ints ' '(symmetric_height_crop, symmetric_width_crop), ' 'or a tuple of 2 tuples of 2 ints ' '((top_crop, bottom_crop), (left_crop, right_crop)). ' 'Found: ' + str(cropping)) super(Cropping2D, self).__init__(normalized_cropping, data_format, **kwargs) class Cropping3D(_Cropping): """Cropping layer for 3D data (e.g. spatial or spatio-temporal). # Arguments cropping: int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints. - If int: the same symmetric cropping is applied to depth, height, and width. - If tuple of 3 ints: interpreted as two different symmetric cropping values for depth, height, and width: `(symmetric_dim1_crop, symmetric_dim2_crop, symmetric_dim3_crop)`. - If tuple of 3 tuples of 2 ints: interpreted as `((left_dim1_crop, right_dim1_crop), (left_dim2_crop, right_dim2_crop), (left_dim3_crop, right_dim3_crop))` data_format: A string, one of `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while `"channels_first"` corresponds to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape 5D tensor with shape: - If `data_format` is `"channels_last"`: `(batch, first_axis_to_crop, second_axis_to_crop, third_axis_to_crop, depth)` - If `data_format` is `"channels_first"`: `(batch, depth, first_axis_to_crop, second_axis_to_crop, third_axis_to_crop)` # Output shape 5D tensor with shape: - If `data_format` is `"channels_last"`: `(batch, first_cropped_axis, second_cropped_axis, third_cropped_axis, depth)` - If `data_format` is `"channels_first"`: `(batch, depth, first_cropped_axis, second_cropped_axis, third_cropped_axis)` """ @interfaces.legacy_cropping3d_support def __init__(self, cropping=((1, 1), (1, 1), (1, 1)), data_format=None, **kwargs): self.data_format = K.normalize_data_format(data_format) if isinstance(cropping, int): normalized_cropping = ((cropping, cropping), (cropping, cropping), (cropping, cropping)) elif hasattr(cropping, '__len__'): if len(cropping) != 3: raise ValueError('`cropping` should have 3 elements. ' 'Found: ' + str(cropping)) dim1_cropping = conv_utils.normalize_tuple(cropping[0], 2, '1st entry of cropping') dim2_cropping = conv_utils.normalize_tuple(cropping[1], 2, '2nd entry of cropping') dim3_cropping = conv_utils.normalize_tuple(cropping[2], 2, '3rd entry of cropping') normalized_cropping = (dim1_cropping, dim2_cropping, dim3_cropping) else: raise ValueError( '`cropping` should be either an int, a tuple of 3 ints ' '(symmetric_dim1_crop, symmetric_dim2_crop, symmetric_dim3_crop), ' 'or a tuple of 3 tuples of 2 ints ' '((left_dim1_crop, right_dim1_crop),' ' (left_dim2_crop, right_dim2_crop),' ' (left_dim3_crop, right_dim2_crop)). ' 'Found: ' + str(cropping)) super(Cropping3D, self).__init__(normalized_cropping, data_format, **kwargs) # Aliases Convolution1D = Conv1D Convolution2D = Conv2D Convolution3D = Conv3D SeparableConvolution1D = SeparableConv1D SeparableConvolution2D = SeparableConv2D Convolution2DTranspose = Conv2DTranspose Deconvolution2D = Deconv2D = Conv2DTranspose Deconvolution3D = Deconv3D = Conv3DTranspose # Legacy aliases AtrousConv1D = AtrousConvolution1D AtrousConv2D = AtrousConvolution2D Keras-2.2.4/keras/layers/recurrent.py0000644000000000116100000030763313354530144017326 0ustar rooteng00000000000000# -*- coding: utf-8 -*- """Recurrent layers and their base classes. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import warnings from .. import backend as K from .. import activations from .. import initializers from .. import regularizers from .. import constraints from ..engine.base_layer import Layer from ..engine.base_layer import InputSpec from ..utils.generic_utils import has_arg from ..utils.generic_utils import to_list # Legacy support. from ..legacy.layers import Recurrent from ..legacy import interfaces class StackedRNNCells(Layer): """Wrapper allowing a stack of RNN cells to behave as a single cell. Used to implement efficient stacked RNNs. # Arguments cells: List of RNN cell instances. # Examples ```python cells = [ keras.layers.LSTMCell(output_dim), keras.layers.LSTMCell(output_dim), keras.layers.LSTMCell(output_dim), ] inputs = keras.Input((timesteps, input_dim)) x = keras.layers.RNN(cells)(inputs) ``` """ def __init__(self, cells, **kwargs): for cell in cells: if not hasattr(cell, 'call'): raise ValueError('All cells must have a `call` method. ' 'received cells:', cells) if not hasattr(cell, 'state_size'): raise ValueError('All cells must have a ' '`state_size` attribute. ' 'received cells:', cells) self.cells = cells # reverse_state_order determines whether the state size will be in a # reverse order of the cells' state. User might want to set this to True # to keep the existing behavior. This is only useful when use # `RNN(return_state=True)` since the state will be returned as the same # order of state_size. self.reverse_state_order = kwargs.pop('reverse_state_order', False) if self.reverse_state_order: warnings.warn('`reverse_state_order=True` in `StackedRNNCells` ' 'will soon be deprecated. Please update the code to ' 'work with the natural order of states if you ' 'reply on the RNN states, ' 'eg `RNN(return_state=True)`.') super(StackedRNNCells, self).__init__(**kwargs) @property def state_size(self): # States are a flat list of the individual cell state size. # e.g. states of a 2-layer LSTM would be `[h1, c1, h2, c2]`. # (assuming one LSTM has states [h, c]) # In the case of reverse_state_order=True, the state_size will be # `[h2, c2, h1, c1]`. state_size = [] for cell in self.cells[::-1] if self.reverse_state_order else self.cells: if hasattr(cell.state_size, '__len__'): state_size += list(cell.state_size) else: state_size.append(cell.state_size) return tuple(state_size) @property def output_size(self): if getattr(self.cells[-1], 'output_size', None) is not None: return self.cells[-1].output_size if hasattr(self.cells[-1].state_size, '__len__'): return self.cells[-1].state_size[0] else: return self.cells[-1].state_size def call(self, inputs, states, constants=None, **kwargs): # Recover per-cell states. nested_states = [] for cell in self.cells[::-1] if self.reverse_state_order else self.cells: if hasattr(cell.state_size, '__len__'): nested_states.append(states[:len(cell.state_size)]) states = states[len(cell.state_size):] else: nested_states.append([states[0]]) states = states[1:] if self.reverse_state_order: nested_states = nested_states[::-1] # Call the cells in order and store the returned states. new_nested_states = [] for cell, states in zip(self.cells, nested_states): if has_arg(cell.call, 'constants'): inputs, states = cell.call(inputs, states, constants=constants, **kwargs) else: inputs, states = cell.call(inputs, states, **kwargs) new_nested_states.append(states) # Format the new states as a flat list # in reverse cell order. new_states = [] if self.reverse_state_order: new_nested_states = new_nested_states[::-1] for cell_states in new_nested_states: new_states += cell_states return inputs, new_states def build(self, input_shape): if isinstance(input_shape, list): constants_shape = input_shape[1:] input_shape = input_shape[0] for cell in self.cells: if isinstance(cell, Layer): if has_arg(cell.call, 'constants'): cell.build([input_shape] + constants_shape) else: cell.build(input_shape) if getattr(cell, 'output_size', None) is not None: output_dim = cell.output_size elif hasattr(cell.state_size, '__len__'): output_dim = cell.state_size[0] else: output_dim = cell.state_size input_shape = (input_shape[0], output_dim) self.built = True def get_config(self): cells = [] for cell in self.cells: cells.append({'class_name': cell.__class__.__name__, 'config': cell.get_config()}) config = {'cells': cells} base_config = super(StackedRNNCells, self).get_config() return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config, custom_objects=None): from . import deserialize as deserialize_layer cells = [] for cell_config in config.pop('cells'): cells.append(deserialize_layer(cell_config, custom_objects=custom_objects)) return cls(cells, **config) @property def trainable_weights(self): if not self.trainable: return [] weights = [] for cell in self.cells: if isinstance(cell, Layer): weights += cell.trainable_weights return weights @property def non_trainable_weights(self): weights = [] for cell in self.cells: if isinstance(cell, Layer): weights += cell.non_trainable_weights if not self.trainable: trainable_weights = [] for cell in self.cells: if isinstance(cell, Layer): trainable_weights += cell.trainable_weights return trainable_weights + weights return weights def get_weights(self): """Retrieves the weights of the model. # Returns A flat list of Numpy arrays. """ weights = [] for cell in self.cells: if isinstance(cell, Layer): weights += cell.weights return K.batch_get_value(weights) def set_weights(self, weights): """Sets the weights of the model. # Arguments weights: A list of Numpy arrays with shapes and types matching the output of `model.get_weights()`. """ tuples = [] for cell in self.cells: if isinstance(cell, Layer): num_param = len(cell.weights) weights = weights[:num_param] for sw, w in zip(cell.weights, weights): tuples.append((sw, w)) weights = weights[num_param:] K.batch_set_value(tuples) @property def losses(self): losses = [] for cell in self.cells: if isinstance(cell, Layer): cell_losses = cell.losses losses += cell_losses return losses def get_losses_for(self, inputs=None): losses = [] for cell in self.cells: if isinstance(cell, Layer): cell_losses = cell.get_losses_for(inputs) losses += cell_losses return losses class RNN(Layer): """Base class for recurrent layers. # Arguments cell: A RNN cell instance. A RNN cell is a class that has: - a `call(input_at_t, states_at_t)` method, returning `(output_at_t, states_at_t_plus_1)`. The call method of the cell can also take the optional argument `constants`, see section "Note on passing external constants" below. - a `state_size` attribute. This can be a single integer (single state) in which case it is the size of the recurrent state (which should be the same as the size of the cell output). This can also be a list/tuple of integers (one size per state). - a `output_size` attribute. This can be a single integer or a TensorShape, which represent the shape of the output. For backward compatible reason, if this attribute is not available for the cell, the value will be inferred by the first element of the `state_size`. It is also possible for `cell` to be a list of RNN cell instances, in which cases the cells get stacked on after the other in the RNN, implementing an efficient stacked RNN. return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence. stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used. Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. input_dim: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model. input_length: Length of input sequences, to be specified when it is constant. This argument is required if you are going to connect `Flatten` then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed). Note that if the recurrent layer is not the first layer in your model, you would need to specify the input length at the level of the first layer (e.g. via the `input_shape` argument) # Input shape 3D tensor with shape `(batch_size, timesteps, input_dim)`. # Output shape - if `return_state`: a list of tensors. The first tensor is the output. The remaining tensors are the last states, each with shape `(batch_size, units)`. - if `return_sequences`: 3D tensor with shape `(batch_size, timesteps, units)`. - else, 2D tensor with shape `(batch_size, units)`. # Masking This layer supports masking for input data with a variable number of timesteps. To introduce masks to your data, use an [Embedding](embeddings.md) layer with the `mask_zero` parameter set to `True`. # Note on using statefulness in RNNs You can set RNN layers to be 'stateful', which means that the states computed for the samples in one batch will be reused as initial states for the samples in the next batch. This assumes a one-to-one mapping between samples in different successive batches. To enable statefulness: - specify `stateful=True` in the layer constructor. - specify a fixed batch size for your model, by passing if sequential model: `batch_input_shape=(...)` to the first layer in your model. else for functional model with 1 or more Input layers: `batch_shape=(...)` to all the first layers in your model. This is the expected shape of your inputs *including the batch size*. It should be a tuple of integers, e.g. `(32, 10, 100)`. - specify `shuffle=False` when calling fit(). To reset the states of your model, call `.reset_states()` on either a specific layer, or on your entire model. # Note on specifying the initial state of RNNs You can specify the initial state of RNN layers symbolically by calling them with the keyword argument `initial_state`. The value of `initial_state` should be a tensor or list of tensors representing the initial state of the RNN layer. You can specify the initial state of RNN layers numerically by calling `reset_states` with the keyword argument `states`. The value of `states` should be a numpy array or list of numpy arrays representing the initial state of the RNN layer. # Note on passing external constants to RNNs You can pass "external" constants to the cell using the `constants` keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This requires that the `cell.call` method accepts the same keyword argument `constants`. Such constants can be used to condition the cell transformation on additional static inputs (not changing over time), a.k.a. an attention mechanism. # Examples ```python # First, let's define a RNN Cell, as a layer subclass. class MinimalRNNCell(keras.layers.Layer): def __init__(self, units, **kwargs): self.units = units self.state_size = units super(MinimalRNNCell, self).__init__(**kwargs) def build(self, input_shape): self.kernel = self.add_weight(shape=(input_shape[-1], self.units), initializer='uniform', name='kernel') self.recurrent_kernel = self.add_weight( shape=(self.units, self.units), initializer='uniform', name='recurrent_kernel') self.built = True def call(self, inputs, states): prev_output = states[0] h = K.dot(inputs, self.kernel) output = h + K.dot(prev_output, self.recurrent_kernel) return output, [output] # Let's use this cell in a RNN layer: cell = MinimalRNNCell(32) x = keras.Input((None, 5)) layer = RNN(cell) y = layer(x) # Here's how to use the cell to build a stacked RNN: cells = [MinimalRNNCell(32), MinimalRNNCell(64)] x = keras.Input((None, 5)) layer = RNN(cells) y = layer(x) ``` """ def __init__(self, cell, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, **kwargs): if isinstance(cell, (list, tuple)): cell = StackedRNNCells(cell) if not hasattr(cell, 'call'): raise ValueError('`cell` should have a `call` method. ' 'The RNN was passed:', cell) if not hasattr(cell, 'state_size'): raise ValueError('The RNN cell should have ' 'an attribute `state_size` ' '(tuple of integers, ' 'one integer per RNN state).') super(RNN, self).__init__(**kwargs) self.cell = cell self.return_sequences = return_sequences self.return_state = return_state self.go_backwards = go_backwards self.stateful = stateful self.unroll = unroll self.supports_masking = True self.input_spec = [InputSpec(ndim=3)] self.state_spec = None self._states = None self.constants_spec = None self._num_constants = None @property def states(self): if self._states is None: if isinstance(self.cell.state_size, int): num_states = 1 else: num_states = len(self.cell.state_size) return [None for _ in range(num_states)] return self._states @states.setter def states(self, states): self._states = states def compute_output_shape(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] if hasattr(self.cell.state_size, '__len__'): state_size = self.cell.state_size else: state_size = [self.cell.state_size] if getattr(self.cell, 'output_size', None) is not None: output_dim = self.cell.output_size else: output_dim = state_size[0] if self.return_sequences: output_shape = (input_shape[0], input_shape[1], output_dim) else: output_shape = (input_shape[0], output_dim) if self.return_state: state_shape = [(input_shape[0], dim) for dim in state_size] return [output_shape] + state_shape else: return output_shape def compute_mask(self, inputs, mask): if isinstance(mask, list): mask = mask[0] output_mask = mask if self.return_sequences else None if self.return_state: state_mask = [None for _ in self.states] return [output_mask] + state_mask else: return output_mask def build(self, input_shape): # Note input_shape will be list of shapes of initial states and # constants if these are passed in __call__. if self._num_constants is not None: constants_shape = input_shape[-self._num_constants:] else: constants_shape = None if isinstance(input_shape, list): input_shape = input_shape[0] batch_size = input_shape[0] if self.stateful else None input_dim = input_shape[-1] self.input_spec[0] = InputSpec(shape=(batch_size, None, input_dim)) # allow cell (if layer) to build before we set or validate state_spec if isinstance(self.cell, Layer): step_input_shape = (input_shape[0],) + input_shape[2:] if constants_shape is not None: self.cell.build([step_input_shape] + constants_shape) else: self.cell.build(step_input_shape) # set or validate state_spec if hasattr(self.cell.state_size, '__len__'): state_size = list(self.cell.state_size) else: state_size = [self.cell.state_size] if self.state_spec is not None: # initial_state was passed in call, check compatibility if [spec.shape[-1] for spec in self.state_spec] != state_size: raise ValueError( 'An `initial_state` was passed that is not compatible with ' '`cell.state_size`. Received `state_spec`={}; ' 'however `cell.state_size` is ' '{}'.format(self.state_spec, self.cell.state_size)) else: self.state_spec = [InputSpec(shape=(None, dim)) for dim in state_size] if self.stateful: self.reset_states() self.built = True def get_initial_state(self, inputs): # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) initial_state = K.expand_dims(initial_state) # (samples, 1) if hasattr(self.cell.state_size, '__len__'): return [K.tile(initial_state, [1, dim]) for dim in self.cell.state_size] else: return [K.tile(initial_state, [1, self.cell.state_size])] def __call__(self, inputs, initial_state=None, constants=None, **kwargs): inputs, initial_state, constants = _standardize_args( inputs, initial_state, constants, self._num_constants) if initial_state is None and constants is None: return super(RNN, self).__call__(inputs, **kwargs) # If any of `initial_state` or `constants` are specified and are Keras # tensors, then add them to the inputs and temporarily modify the # input_spec to include them. additional_inputs = [] additional_specs = [] if initial_state is not None: kwargs['initial_state'] = initial_state additional_inputs += initial_state self.state_spec = [InputSpec(shape=K.int_shape(state)) for state in initial_state] additional_specs += self.state_spec if constants is not None: kwargs['constants'] = constants additional_inputs += constants self.constants_spec = [InputSpec(shape=K.int_shape(constant)) for constant in constants] self._num_constants = len(constants) additional_specs += self.constants_spec # at this point additional_inputs cannot be empty is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) for tensor in additional_inputs: if K.is_keras_tensor(tensor) != is_keras_tensor: raise ValueError('The initial state or constants of an RNN' ' layer cannot be specified with a mix of' ' Keras tensors and non-Keras tensors' ' (a "Keras tensor" is a tensor that was' ' returned by a Keras layer, or by `Input`)') if is_keras_tensor: # Compute the full input spec, including state and constants full_input = [inputs] + additional_inputs full_input_spec = self.input_spec + additional_specs # Perform the call with temporarily replaced input_spec original_input_spec = self.input_spec self.input_spec = full_input_spec output = super(RNN, self).__call__(full_input, **kwargs) self.input_spec = original_input_spec return output else: return super(RNN, self).__call__(inputs, **kwargs) def call(self, inputs, mask=None, training=None, initial_state=None, constants=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): # get initial_state from full input spec # as they could be copied to multiple GPU. if self._num_constants is None: initial_state = inputs[1:] else: initial_state = inputs[1:-self._num_constants] if len(initial_state) == 0: initial_state = None inputs = inputs[0] if initial_state is not None: pass elif self.stateful: initial_state = self.states else: initial_state = self.get_initial_state(inputs) if isinstance(mask, list): mask = mask[0] if len(initial_state) != len(self.states): raise ValueError('Layer has ' + str(len(self.states)) + ' states but was passed ' + str(len(initial_state)) + ' initial states.') input_shape = K.int_shape(inputs) timesteps = input_shape[1] if self.unroll and timesteps in [None, 1]: raise ValueError('Cannot unroll a RNN if the ' 'time dimension is undefined or equal to 1. \n' '- If using a Sequential model, ' 'specify the time dimension by passing ' 'an `input_shape` or `batch_input_shape` ' 'argument to your first layer. If your ' 'first layer is an Embedding, you can ' 'also use the `input_length` argument.\n' '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') kwargs = {} if has_arg(self.cell.call, 'training'): kwargs['training'] = training if constants: if not has_arg(self.cell.call, 'constants'): raise ValueError('RNN cell does not support constants') def step(inputs, states): constants = states[-self._num_constants:] states = states[:-self._num_constants] return self.cell.call(inputs, states, constants=constants, **kwargs) else: def step(inputs, states): return self.cell.call(inputs, states, **kwargs) last_output, outputs, states = K.rnn(step, inputs, initial_state, constants=constants, go_backwards=self.go_backwards, mask=mask, unroll=self.unroll, input_length=timesteps) if self.stateful: updates = [] for i in range(len(states)): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) if self.return_sequences: output = outputs else: output = last_output # Properly set learning phase if getattr(last_output, '_uses_learning_phase', False): output._uses_learning_phase = True for state in states: state._uses_learning_phase = True if self.return_state: states = to_list(states, allow_tuple=True) return [output] + states else: return output def reset_states(self, states=None): if not self.stateful: raise AttributeError('Layer must be stateful.') batch_size = self.input_spec[0].shape[0] if not batch_size: raise ValueError('If a RNN is stateful, it needs to know ' 'its batch size. Specify the batch size ' 'of your input tensors: \n' '- If using a Sequential model, ' 'specify the batch size by passing ' 'a `batch_input_shape` ' 'argument to your first layer.\n' '- If using the functional API, specify ' 'the batch size by passing a ' '`batch_shape` argument to your Input layer.') # initialize state if None if self.states[0] is None: if hasattr(self.cell.state_size, '__len__'): self.states = [K.zeros((batch_size, dim)) for dim in self.cell.state_size] else: self.states = [K.zeros((batch_size, self.cell.state_size))] elif states is None: if hasattr(self.cell.state_size, '__len__'): for state, dim in zip(self.states, self.cell.state_size): K.set_value(state, np.zeros((batch_size, dim))) else: K.set_value(self.states[0], np.zeros((batch_size, self.cell.state_size))) else: states = to_list(states, allow_tuple=True) if len(states) != len(self.states): raise ValueError('Layer ' + self.name + ' expects ' + str(len(self.states)) + ' states, ' 'but it received ' + str(len(states)) + ' state values. Input received: ' + str(states)) for index, (value, state) in enumerate(zip(states, self.states)): if hasattr(self.cell.state_size, '__len__'): dim = self.cell.state_size[index] else: dim = self.cell.state_size if value.shape != (batch_size, dim): raise ValueError('State ' + str(index) + ' is incompatible with layer ' + self.name + ': expected shape=' + str((batch_size, dim)) + ', found shape=' + str(value.shape)) # TODO: consider batch calls to `set_value`. K.set_value(state, value) def get_config(self): config = {'return_sequences': self.return_sequences, 'return_state': self.return_state, 'go_backwards': self.go_backwards, 'stateful': self.stateful, 'unroll': self.unroll} if self._num_constants is not None: config['num_constants'] = self._num_constants cell_config = self.cell.get_config() config['cell'] = {'class_name': self.cell.__class__.__name__, 'config': cell_config} base_config = super(RNN, self).get_config() return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config, custom_objects=None): from . import deserialize as deserialize_layer cell = deserialize_layer(config.pop('cell'), custom_objects=custom_objects) num_constants = config.pop('num_constants', None) layer = cls(cell, **config) layer._num_constants = num_constants return layer @property def trainable_weights(self): if not self.trainable: return [] if isinstance(self.cell, Layer): return self.cell.trainable_weights return [] @property def non_trainable_weights(self): if isinstance(self.cell, Layer): if not self.trainable: return self.cell.weights return self.cell.non_trainable_weights return [] @property def losses(self): layer_losses = super(RNN, self).losses if isinstance(self.cell, Layer): return self.cell.losses + layer_losses return layer_losses def get_losses_for(self, inputs=None): if isinstance(self.cell, Layer): cell_losses = self.cell.get_losses_for(inputs) return cell_losses + super(RNN, self).get_losses_for(inputs) return super(RNN, self).get_losses_for(inputs) class SimpleRNNCell(Layer): """Cell class for SimpleRNN. # Arguments units: Positive integer, dimensionality of the output space. activation: Activation function to use (see [activations](../activations.md)). Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to the `recurrent_kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the `kernel` weights matrix (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to the `recurrent_kernel` weights matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. """ def __init__(self, units, activation='tanh', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., **kwargs): super(SimpleRNNCell, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.recurrent_regularizer = regularizers.get(recurrent_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.recurrent_constraint = constraints.get(recurrent_constraint) self.bias_constraint = constraints.get(bias_constraint) self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) self.state_size = self.units self.output_size = self.units self._dropout_mask = None self._recurrent_dropout_mask = None def build(self, input_shape): self.kernel = self.add_weight(shape=(input_shape[-1], self.units), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.recurrent_kernel = self.add_weight( shape=(self.units, self.units), name='recurrent_kernel', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) if self.use_bias: self.bias = self.add_weight(shape=(self.units,), name='bias', initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None self.built = True def call(self, inputs, states, training=None): prev_output = states[0] if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( K.ones_like(inputs), self.dropout, training=training) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(prev_output), self.recurrent_dropout, training=training) dp_mask = self._dropout_mask rec_dp_mask = self._recurrent_dropout_mask if dp_mask is not None: h = K.dot(inputs * dp_mask, self.kernel) else: h = K.dot(inputs, self.kernel) if self.bias is not None: h = K.bias_add(h, self.bias) if rec_dp_mask is not None: prev_output *= rec_dp_mask output = h + K.dot(prev_output, self.recurrent_kernel) if self.activation is not None: output = self.activation(output) # Properly set learning phase on output tensor. if 0 < self.dropout + self.recurrent_dropout: if training is None: output._uses_learning_phase = True return output, [output] def get_config(self): config = {'units': self.units, 'activation': activations.serialize(self.activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout} base_config = super(SimpleRNNCell, self).get_config() return dict(list(base_config.items()) + list(config.items())) class SimpleRNN(RNN): """Fully-connected RNN where the output is to be fed back to input. # Arguments units: Positive integer, dimensionality of the output space. activation: Activation function to use (see [activations](../activations.md)). Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to the `recurrent_kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the `kernel` weights matrix (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to the `recurrent_kernel` weights matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence. stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used. Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. """ @interfaces.legacy_recurrent_support def __init__(self, units, activation='tanh', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, **kwargs): if 'implementation' in kwargs: kwargs.pop('implementation') warnings.warn('The `implementation` argument ' 'in `SimpleRNN` has been deprecated. ' 'Please remove it from your layer call.') if K.backend() == 'theano' and (dropout or recurrent_dropout): warnings.warn( 'RNN dropout is no longer supported with the Theano backend ' 'due to technical limitations. ' 'You can either set `dropout` and `recurrent_dropout` to 0, ' 'or use the TensorFlow backend.') dropout = 0. recurrent_dropout = 0. cell = SimpleRNNCell(units, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, dropout=dropout, recurrent_dropout=recurrent_dropout) super(SimpleRNN, self).__init__(cell, return_sequences=return_sequences, return_state=return_state, go_backwards=go_backwards, stateful=stateful, unroll=unroll, **kwargs) self.activity_regularizer = regularizers.get(activity_regularizer) def call(self, inputs, mask=None, training=None, initial_state=None): self.cell._dropout_mask = None self.cell._recurrent_dropout_mask = None return super(SimpleRNN, self).call(inputs, mask=mask, training=training, initial_state=initial_state) @property def units(self): return self.cell.units @property def activation(self): return self.cell.activation @property def use_bias(self): return self.cell.use_bias @property def kernel_initializer(self): return self.cell.kernel_initializer @property def recurrent_initializer(self): return self.cell.recurrent_initializer @property def bias_initializer(self): return self.cell.bias_initializer @property def kernel_regularizer(self): return self.cell.kernel_regularizer @property def recurrent_regularizer(self): return self.cell.recurrent_regularizer @property def bias_regularizer(self): return self.cell.bias_regularizer @property def kernel_constraint(self): return self.cell.kernel_constraint @property def recurrent_constraint(self): return self.cell.recurrent_constraint @property def bias_constraint(self): return self.cell.bias_constraint @property def dropout(self): return self.cell.dropout @property def recurrent_dropout(self): return self.cell.recurrent_dropout def get_config(self): config = {'units': self.units, 'activation': activations.serialize(self.activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout} base_config = super(SimpleRNN, self).get_config() del base_config['cell'] return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config): if 'implementation' in config: config.pop('implementation') return cls(**config) class GRUCell(Layer): """Cell class for the GRU layer. # Arguments units: Positive integer, dimensionality of the output space. activation: Activation function to use (see [activations](../activations.md)). Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step (see [activations](../activations.md)). Default: hard sigmoid (`hard_sigmoid`). If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to the `recurrent_kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the `kernel` weights matrix (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to the `recurrent_kernel` weights matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. implementation: Implementation mode, either 1 or 2. Mode 1 will structure its operations as a larger number of smaller dot products and additions, whereas mode 2 will batch them into fewer, larger operations. These modes will have different performance profiles on different hardware and for different applications. reset_after: GRU convention (whether to apply reset gate after or before matrix multiplication). False = "before" (default), True = "after" (CuDNN compatible). """ def __init__(self, units, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., implementation=1, reset_after=False, **kwargs): super(GRUCell, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) self.recurrent_activation = activations.get(recurrent_activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.recurrent_regularizer = regularizers.get(recurrent_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.recurrent_constraint = constraints.get(recurrent_constraint) self.bias_constraint = constraints.get(bias_constraint) self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) self.implementation = implementation self.reset_after = reset_after self.state_size = self.units self.output_size = self.units self._dropout_mask = None self._recurrent_dropout_mask = None def build(self, input_shape): input_dim = input_shape[-1] self.kernel = self.add_weight(shape=(input_dim, self.units * 3), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.recurrent_kernel = self.add_weight( shape=(self.units, self.units * 3), name='recurrent_kernel', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) if self.use_bias: if not self.reset_after: bias_shape = (3 * self.units,) else: # separate biases for input and recurrent kernels # Note: the shape is intentionally different from CuDNNGRU biases # `(2 * 3 * self.units,)`, so that we can distinguish the classes # when loading and converting saved weights. bias_shape = (2, 3 * self.units) self.bias = self.add_weight(shape=bias_shape, name='bias', initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) if not self.reset_after: self.input_bias, self.recurrent_bias = self.bias, None else: # NOTE: need to flatten, since slicing in CNTK gives 2D array self.input_bias = K.flatten(self.bias[0]) self.recurrent_bias = K.flatten(self.bias[1]) else: self.bias = None # update gate self.kernel_z = self.kernel[:, :self.units] self.recurrent_kernel_z = self.recurrent_kernel[:, :self.units] # reset gate self.kernel_r = self.kernel[:, self.units: self.units * 2] self.recurrent_kernel_r = self.recurrent_kernel[:, self.units: self.units * 2] # new gate self.kernel_h = self.kernel[:, self.units * 2:] self.recurrent_kernel_h = self.recurrent_kernel[:, self.units * 2:] if self.use_bias: # bias for inputs self.input_bias_z = self.input_bias[:self.units] self.input_bias_r = self.input_bias[self.units: self.units * 2] self.input_bias_h = self.input_bias[self.units * 2:] # bias for hidden state - just for compatibility with CuDNN if self.reset_after: self.recurrent_bias_z = self.recurrent_bias[:self.units] self.recurrent_bias_r = ( self.recurrent_bias[self.units: self.units * 2]) self.recurrent_bias_h = self.recurrent_bias[self.units * 2:] else: self.input_bias_z = None self.input_bias_r = None self.input_bias_h = None if self.reset_after: self.recurrent_bias_z = None self.recurrent_bias_r = None self.recurrent_bias_h = None self.built = True def call(self, inputs, states, training=None): h_tm1 = states[0] # previous memory if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( K.ones_like(inputs), self.dropout, training=training, count=3) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(h_tm1), self.recurrent_dropout, training=training, count=3) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask if self.implementation == 1: if 0. < self.dropout < 1.: inputs_z = inputs * dp_mask[0] inputs_r = inputs * dp_mask[1] inputs_h = inputs * dp_mask[2] else: inputs_z = inputs inputs_r = inputs inputs_h = inputs x_z = K.dot(inputs_z, self.kernel_z) x_r = K.dot(inputs_r, self.kernel_r) x_h = K.dot(inputs_h, self.kernel_h) if self.use_bias: x_z = K.bias_add(x_z, self.input_bias_z) x_r = K.bias_add(x_r, self.input_bias_r) x_h = K.bias_add(x_h, self.input_bias_h) if 0. < self.recurrent_dropout < 1.: h_tm1_z = h_tm1 * rec_dp_mask[0] h_tm1_r = h_tm1 * rec_dp_mask[1] h_tm1_h = h_tm1 * rec_dp_mask[2] else: h_tm1_z = h_tm1 h_tm1_r = h_tm1 h_tm1_h = h_tm1 recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel_z) recurrent_r = K.dot(h_tm1_r, self.recurrent_kernel_r) if self.reset_after and self.use_bias: recurrent_z = K.bias_add(recurrent_z, self.recurrent_bias_z) recurrent_r = K.bias_add(recurrent_r, self.recurrent_bias_r) z = self.recurrent_activation(x_z + recurrent_z) r = self.recurrent_activation(x_r + recurrent_r) # reset gate applied after/before matrix multiplication if self.reset_after: recurrent_h = K.dot(h_tm1_h, self.recurrent_kernel_h) if self.use_bias: recurrent_h = K.bias_add(recurrent_h, self.recurrent_bias_h) recurrent_h = r * recurrent_h else: recurrent_h = K.dot(r * h_tm1_h, self.recurrent_kernel_h) hh = self.activation(x_h + recurrent_h) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] # inputs projected by all gate matrices at once matrix_x = K.dot(inputs, self.kernel) if self.use_bias: # biases: bias_z_i, bias_r_i, bias_h_i matrix_x = K.bias_add(matrix_x, self.input_bias) x_z = matrix_x[:, :self.units] x_r = matrix_x[:, self.units: 2 * self.units] x_h = matrix_x[:, 2 * self.units:] if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] if self.reset_after: # hidden state projected by all gate matrices at once matrix_inner = K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: matrix_inner = K.bias_add(matrix_inner, self.recurrent_bias) else: # hidden state projected separately for update/reset and new matrix_inner = K.dot(h_tm1, self.recurrent_kernel[:, :2 * self.units]) recurrent_z = matrix_inner[:, :self.units] recurrent_r = matrix_inner[:, self.units: 2 * self.units] z = self.recurrent_activation(x_z + recurrent_z) r = self.recurrent_activation(x_r + recurrent_r) if self.reset_after: recurrent_h = r * matrix_inner[:, 2 * self.units:] else: recurrent_h = K.dot(r * h_tm1, self.recurrent_kernel[:, 2 * self.units:]) hh = self.activation(x_h + recurrent_h) # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h] def get_config(self): config = {'units': self.units, 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout, 'implementation': self.implementation, 'reset_after': self.reset_after} base_config = super(GRUCell, self).get_config() return dict(list(base_config.items()) + list(config.items())) class GRU(RNN): """Gated Recurrent Unit - Cho et al. 2014. There are two variants. The default one is based on 1406.1078v3 and has reset gate applied to hidden state before matrix multiplication. The other one is based on original 1406.1078v1 and has the order reversed. The second variant is compatible with CuDNNGRU (GPU-only) and allows inference on CPU. Thus it has separate biases for `kernel` and `recurrent_kernel`. Use `'reset_after'=True` and `recurrent_activation='sigmoid'`. # Arguments units: Positive integer, dimensionality of the output space. activation: Activation function to use (see [activations](../activations.md)). Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step (see [activations](../activations.md)). Default: hard sigmoid (`hard_sigmoid`). If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to the `recurrent_kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the `kernel` weights matrix (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to the `recurrent_kernel` weights matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. implementation: Implementation mode, either 1 or 2. Mode 1 will structure its operations as a larger number of smaller dot products and additions, whereas mode 2 will batch them into fewer, larger operations. These modes will have different performance profiles on different hardware and for different applications. return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence. stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used. Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. reset_after: GRU convention (whether to apply reset gate after or before matrix multiplication). False = "before" (default), True = "after" (CuDNN compatible). # References - [Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation](https://arxiv.org/abs/1406.1078) - [On the Properties of Neural Machine Translation: Encoder-Decoder Approaches](https://arxiv.org/abs/1409.1259) - [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling](https://arxiv.org/abs/1412.3555v1) - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](https://arxiv.org/abs/1512.05287) """ @interfaces.legacy_recurrent_support def __init__(self, units, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., implementation=1, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, reset_after=False, **kwargs): if implementation == 0: warnings.warn('`implementation=0` has been deprecated, ' 'and now defaults to `implementation=1`.' 'Please update your layer call.') if K.backend() == 'theano' and (dropout or recurrent_dropout): warnings.warn( 'RNN dropout is no longer supported with the Theano backend ' 'due to technical limitations. ' 'You can either set `dropout` and `recurrent_dropout` to 0, ' 'or use the TensorFlow backend.') dropout = 0. recurrent_dropout = 0. cell = GRUCell(units, activation=activation, recurrent_activation=recurrent_activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, dropout=dropout, recurrent_dropout=recurrent_dropout, implementation=implementation, reset_after=reset_after) super(GRU, self).__init__(cell, return_sequences=return_sequences, return_state=return_state, go_backwards=go_backwards, stateful=stateful, unroll=unroll, **kwargs) self.activity_regularizer = regularizers.get(activity_regularizer) def call(self, inputs, mask=None, training=None, initial_state=None): self.cell._dropout_mask = None self.cell._recurrent_dropout_mask = None return super(GRU, self).call(inputs, mask=mask, training=training, initial_state=initial_state) @property def units(self): return self.cell.units @property def activation(self): return self.cell.activation @property def recurrent_activation(self): return self.cell.recurrent_activation @property def use_bias(self): return self.cell.use_bias @property def kernel_initializer(self): return self.cell.kernel_initializer @property def recurrent_initializer(self): return self.cell.recurrent_initializer @property def bias_initializer(self): return self.cell.bias_initializer @property def kernel_regularizer(self): return self.cell.kernel_regularizer @property def recurrent_regularizer(self): return self.cell.recurrent_regularizer @property def bias_regularizer(self): return self.cell.bias_regularizer @property def kernel_constraint(self): return self.cell.kernel_constraint @property def recurrent_constraint(self): return self.cell.recurrent_constraint @property def bias_constraint(self): return self.cell.bias_constraint @property def dropout(self): return self.cell.dropout @property def recurrent_dropout(self): return self.cell.recurrent_dropout @property def implementation(self): return self.cell.implementation @property def reset_after(self): return self.cell.reset_after def get_config(self): config = {'units': self.units, 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout, 'implementation': self.implementation, 'reset_after': self.reset_after} base_config = super(GRU, self).get_config() del base_config['cell'] return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config): if 'implementation' in config and config['implementation'] == 0: config['implementation'] = 1 return cls(**config) class LSTMCell(Layer): """Cell class for the LSTM layer. # Arguments units: Positive integer, dimensionality of the output space. activation: Activation function to use (see [activations](../activations.md)). Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step (see [activations](../activations.md)). Default: hard sigmoid (`hard_sigmoid`). If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`).x use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Setting it to true will also force `bias_initializer="zeros"`. This is recommended in [Jozefowicz et al.] (http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to the `recurrent_kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the `kernel` weights matrix (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to the `recurrent_kernel` weights matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. implementation: Implementation mode, either 1 or 2. Mode 1 will structure its operations as a larger number of smaller dot products and additions, whereas mode 2 will batch them into fewer, larger operations. These modes will have different performance profiles on different hardware and for different applications. """ def __init__(self, units, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., implementation=1, **kwargs): super(LSTMCell, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) self.recurrent_activation = activations.get(recurrent_activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) self.bias_initializer = initializers.get(bias_initializer) self.unit_forget_bias = unit_forget_bias self.kernel_regularizer = regularizers.get(kernel_regularizer) self.recurrent_regularizer = regularizers.get(recurrent_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.recurrent_constraint = constraints.get(recurrent_constraint) self.bias_constraint = constraints.get(bias_constraint) self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) self.implementation = implementation self.state_size = (self.units, self.units) self.output_size = self.units self._dropout_mask = None self._recurrent_dropout_mask = None def build(self, input_shape): input_dim = input_shape[-1] self.kernel = self.add_weight(shape=(input_dim, self.units * 4), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.recurrent_kernel = self.add_weight( shape=(self.units, self.units * 4), name='recurrent_kernel', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) if self.use_bias: if self.unit_forget_bias: def bias_initializer(_, *args, **kwargs): return K.concatenate([ self.bias_initializer((self.units,), *args, **kwargs), initializers.Ones()((self.units,), *args, **kwargs), self.bias_initializer((self.units * 2,), *args, **kwargs), ]) else: bias_initializer = self.bias_initializer self.bias = self.add_weight(shape=(self.units * 4,), name='bias', initializer=bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None self.kernel_i = self.kernel[:, :self.units] self.kernel_f = self.kernel[:, self.units: self.units * 2] self.kernel_c = self.kernel[:, self.units * 2: self.units * 3] self.kernel_o = self.kernel[:, self.units * 3:] self.recurrent_kernel_i = self.recurrent_kernel[:, :self.units] self.recurrent_kernel_f = ( self.recurrent_kernel[:, self.units: self.units * 2]) self.recurrent_kernel_c = ( self.recurrent_kernel[:, self.units * 2: self.units * 3]) self.recurrent_kernel_o = self.recurrent_kernel[:, self.units * 3:] if self.use_bias: self.bias_i = self.bias[:self.units] self.bias_f = self.bias[self.units: self.units * 2] self.bias_c = self.bias[self.units * 2: self.units * 3] self.bias_o = self.bias[self.units * 3:] else: self.bias_i = None self.bias_f = None self.bias_c = None self.bias_o = None self.built = True def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( K.ones_like(inputs), self.dropout, training=training, count=4) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(states[0]), self.recurrent_dropout, training=training, count=4) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if self.implementation == 1: if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs x_i = K.dot(inputs_i, self.kernel_i) x_f = K.dot(inputs_f, self.kernel_f) x_c = K.dot(inputs_c, self.kernel_c) x_o = K.dot(inputs_o, self.kernel_o) if self.use_bias: x_i = K.bias_add(x_i, self.bias_i) x_f = K.bias_add(x_f, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) x_o = K.bias_add(x_o, self.bias_o) if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 i = self.recurrent_activation(x_i + K.dot(h_tm1_i, self.recurrent_kernel_i)) f = self.recurrent_activation(x_f + K.dot(h_tm1_f, self.recurrent_kernel_f)) c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1_c, self.recurrent_kernel_c)) o = self.recurrent_activation(x_o + K.dot(h_tm1_o, self.recurrent_kernel_o)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.kernel) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] z += K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: z = K.bias_add(z, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units: 2 * self.units] z2 = z[:, 2 * self.units: 3 * self.units] z3 = z[:, 3 * self.units:] i = self.recurrent_activation(z0) f = self.recurrent_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3) h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c] def get_config(self): config = {'units': self.units, 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'unit_forget_bias': self.unit_forget_bias, 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout, 'implementation': self.implementation} base_config = super(LSTMCell, self).get_config() return dict(list(base_config.items()) + list(config.items())) class LSTM(RNN): """Long Short-Term Memory layer - Hochreiter 1997. # Arguments units: Positive integer, dimensionality of the output space. activation: Activation function to use (see [activations](../activations.md)). Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step (see [activations](../activations.md)). Default: hard sigmoid (`hard_sigmoid`). If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state. (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Setting it to true will also force `bias_initializer="zeros"`. This is recommended in [Jozefowicz et al.] (http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to the `recurrent_kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the `kernel` weights matrix (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to the `recurrent_kernel` weights matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. implementation: Implementation mode, either 1 or 2. Mode 1 will structure its operations as a larger number of smaller dot products and additions, whereas mode 2 will batch them into fewer, larger operations. These modes will have different performance profiles on different hardware and for different applications. return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence. stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used. Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. # References - [Long short-term memory] (http://www.bioinf.jku.at/publications/older/2604.pdf) - [Learning to forget: Continual prediction with LSTM] (http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015) - [Supervised sequence labeling with recurrent neural networks] (http://www.cs.toronto.edu/~graves/preprint.pdf) - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](https://arxiv.org/abs/1512.05287) """ @interfaces.legacy_recurrent_support def __init__(self, units, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., implementation=1, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, **kwargs): if implementation == 0: warnings.warn('`implementation=0` has been deprecated, ' 'and now defaults to `implementation=1`.' 'Please update your layer call.') if K.backend() == 'theano' and (dropout or recurrent_dropout): warnings.warn( 'RNN dropout is no longer supported with the Theano backend ' 'due to technical limitations. ' 'You can either set `dropout` and `recurrent_dropout` to 0, ' 'or use the TensorFlow backend.') dropout = 0. recurrent_dropout = 0. cell = LSTMCell(units, activation=activation, recurrent_activation=recurrent_activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, unit_forget_bias=unit_forget_bias, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, dropout=dropout, recurrent_dropout=recurrent_dropout, implementation=implementation) super(LSTM, self).__init__(cell, return_sequences=return_sequences, return_state=return_state, go_backwards=go_backwards, stateful=stateful, unroll=unroll, **kwargs) self.activity_regularizer = regularizers.get(activity_regularizer) def call(self, inputs, mask=None, training=None, initial_state=None): self.cell._dropout_mask = None self.cell._recurrent_dropout_mask = None return super(LSTM, self).call(inputs, mask=mask, training=training, initial_state=initial_state) @property def units(self): return self.cell.units @property def activation(self): return self.cell.activation @property def recurrent_activation(self): return self.cell.recurrent_activation @property def use_bias(self): return self.cell.use_bias @property def kernel_initializer(self): return self.cell.kernel_initializer @property def recurrent_initializer(self): return self.cell.recurrent_initializer @property def bias_initializer(self): return self.cell.bias_initializer @property def unit_forget_bias(self): return self.cell.unit_forget_bias @property def kernel_regularizer(self): return self.cell.kernel_regularizer @property def recurrent_regularizer(self): return self.cell.recurrent_regularizer @property def bias_regularizer(self): return self.cell.bias_regularizer @property def kernel_constraint(self): return self.cell.kernel_constraint @property def recurrent_constraint(self): return self.cell.recurrent_constraint @property def bias_constraint(self): return self.cell.bias_constraint @property def dropout(self): return self.cell.dropout @property def recurrent_dropout(self): return self.cell.recurrent_dropout @property def implementation(self): return self.cell.implementation def get_config(self): config = {'units': self.units, 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'unit_forget_bias': self.unit_forget_bias, 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout, 'implementation': self.implementation} base_config = super(LSTM, self).get_config() del base_config['cell'] return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config): if 'implementation' in config and config['implementation'] == 0: config['implementation'] = 1 return cls(**config) def _generate_dropout_mask(ones, rate, training=None, count=1): def dropped_inputs(): return K.dropout(ones, rate) if count > 1: return [K.in_train_phase( dropped_inputs, ones, training=training) for _ in range(count)] return K.in_train_phase( dropped_inputs, ones, training=training) def _standardize_args(inputs, initial_state, constants, num_constants): """Standardize `__call__` to a single list of tensor inputs. When running a model loaded from file, the input tensors `initial_state` and `constants` can be passed to `RNN.__call__` as part of `inputs` instead of by the dedicated keyword arguments. This method makes sure the arguments are separated and that `initial_state` and `constants` are lists of tensors (or None). # Arguments inputs: tensor or list/tuple of tensors initial_state: tensor or list of tensors or None constants: tensor or list of tensors or None # Returns inputs: tensor initial_state: list of tensors or None constants: list of tensors or None """ if isinstance(inputs, list): assert initial_state is None and constants is None if num_constants is not None: constants = inputs[-num_constants:] inputs = inputs[:-num_constants] if len(inputs) > 1: initial_state = inputs[1:] inputs = inputs[0] def to_list_or_none(x): if x is None or isinstance(x, list): return x if isinstance(x, tuple): return list(x) return [x] initial_state = to_list_or_none(initial_state) constants = to_list_or_none(constants) return inputs, initial_state, constants Keras-2.2.4/keras/layers/local.py0000644000000000116100000004400713354530144016400 0ustar rooteng00000000000000# -*- coding: utf-8 -*- """Locally-connected layers. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from .. import backend as K from .. import activations from .. import initializers from .. import regularizers from .. import constraints from ..engine.base_layer import Layer from ..engine.base_layer import InputSpec from ..utils import conv_utils from ..legacy import interfaces class LocallyConnected1D(Layer): """Locally-connected layer for 1D inputs. The `LocallyConnected1D` layer works similarly to the `Conv1D` layer, except that weights are unshared, that is, a different set of filters is applied at each different patch of the input. # Example ```python # apply a unshared weight convolution 1d of length 3 to a sequence with # 10 timesteps, with 64 output filters model = Sequential() model.add(LocallyConnected1D(64, 3, input_shape=(10, 32))) # now model.output_shape == (None, 8, 64) # add a new conv1d on top model.add(LocallyConnected1D(32, 3)) # now model.output_shape == (None, 6, 32) ``` # Arguments filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of a single integer, specifying the length of the 1D convolution window. strides: An integer or tuple/list of a single integer, specifying the stride length of the convolution. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: Currently only supports `"valid"` (case-insensitive). `"same"` may be supported in the future. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the kernel matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). # Input shape 3D tensor with shape: `(batch_size, steps, input_dim)` # Output shape 3D tensor with shape: `(batch_size, new_steps, filters)` `steps` value might have changed due to padding or strides. """ @interfaces.legacy_conv1d_support def __init__(self, filters, kernel_size, strides=1, padding='valid', data_format=None, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs): super(LocallyConnected1D, self).__init__(**kwargs) self.filters = filters self.kernel_size = conv_utils.normalize_tuple(kernel_size, 1, 'kernel_size') self.strides = conv_utils.normalize_tuple(strides, 1, 'strides') self.padding = conv_utils.normalize_padding(padding) if self.padding != 'valid': raise ValueError('Invalid border mode for LocallyConnected1D ' '(only "valid" is supported): ' + padding) self.data_format = K.normalize_data_format(data_format) self.activation = activations.get(activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.bias_constraint = constraints.get(bias_constraint) self.input_spec = InputSpec(ndim=3) def build(self, input_shape): input_dim = input_shape[2] if input_dim is None: raise ValueError('Axis 2 of input should be fully-defined. ' 'Found shape:', input_shape) output_length = conv_utils.conv_output_length(input_shape[1], self.kernel_size[0], self.padding, self.strides[0]) self.kernel_shape = (output_length, self.kernel_size[0] * input_dim, self.filters) self.kernel = self.add_weight( shape=self.kernel_shape, initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self.bias = self.add_weight( shape=(output_length, self.filters), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None self.input_spec = InputSpec(ndim=3, axes={2: input_dim}) self.built = True def compute_output_shape(self, input_shape): length = conv_utils.conv_output_length(input_shape[1], self.kernel_size[0], self.padding, self.strides[0]) return (input_shape[0], length, self.filters) def call(self, inputs): output = K.local_conv1d(inputs, self.kernel, self.kernel_size, self.strides) if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) return output def get_config(self): config = { 'filters': self.filters, 'kernel_size': self.kernel_size, 'strides': self.strides, 'padding': self.padding, 'activation': activations.serialize(self.activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint) } base_config = super(LocallyConnected1D, self).get_config() return dict(list(base_config.items()) + list(config.items())) class LocallyConnected2D(Layer): """Locally-connected layer for 2D inputs. The `LocallyConnected2D` layer works similarly to the `Conv2D` layer, except that weights are unshared, that is, a different set of filters is applied at each different patch of the input. # Examples ```python # apply a 3x3 unshared weights convolution with 64 output filters # on a 32x32 image with `data_format="channels_last"`: model = Sequential() model.add(LocallyConnected2D(64, (3, 3), input_shape=(32, 32, 3))) # now model.output_shape == (None, 30, 30, 64) # notice that this layer will consume (30*30)*(3*3*3*64) # + (30*30)*64 parameters # add a 3x3 unshared weights convolution on top, with 32 output filters: model.add(LocallyConnected2D(32, (3, 3))) # now model.output_shape == (None, 28, 28, 32) ``` # Arguments filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. padding: Currently only support `"valid"` (case-insensitive). `"same"` will be supported in future. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, height, width, channels)` while `channels_first` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the kernel matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). # Input shape 4D tensor with shape: `(samples, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, rows, cols, channels)` if data_format='channels_last'. # Output shape 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. `rows` and `cols` values might have changed due to padding. """ @interfaces.legacy_conv2d_support def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs): super(LocallyConnected2D, self).__init__(**kwargs) self.filters = filters self.kernel_size = conv_utils.normalize_tuple(kernel_size, 2, 'kernel_size') self.strides = conv_utils.normalize_tuple(strides, 2, 'strides') self.padding = conv_utils.normalize_padding(padding) if self.padding != 'valid': raise ValueError('Invalid border mode for LocallyConnected2D ' '(only "valid" is supported): ' + padding) self.data_format = K.normalize_data_format(data_format) self.activation = activations.get(activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.bias_constraint = constraints.get(bias_constraint) self.input_spec = InputSpec(ndim=4) def build(self, input_shape): if self.data_format == 'channels_last': input_row, input_col = input_shape[1:-1] input_filter = input_shape[3] else: input_row, input_col = input_shape[2:] input_filter = input_shape[1] if input_row is None or input_col is None: raise ValueError('The spatial dimensions of the inputs to ' ' a LocallyConnected2D layer ' 'should be fully-defined, but layer received ' 'the inputs shape ' + str(input_shape)) output_row = conv_utils.conv_output_length(input_row, self.kernel_size[0], self.padding, self.strides[0]) output_col = conv_utils.conv_output_length(input_col, self.kernel_size[1], self.padding, self.strides[1]) self.output_row = output_row self.output_col = output_col self.kernel_shape = ( output_row * output_col, self.kernel_size[0] * self.kernel_size[1] * input_filter, self.filters) self.kernel = self.add_weight(shape=self.kernel_shape, initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self.bias = self.add_weight(shape=(output_row, output_col, self.filters), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None if self.data_format == 'channels_first': self.input_spec = InputSpec(ndim=4, axes={1: input_filter}) else: self.input_spec = InputSpec(ndim=4, axes={-1: input_filter}) self.built = True def compute_output_shape(self, input_shape): if self.data_format == 'channels_first': rows = input_shape[2] cols = input_shape[3] elif self.data_format == 'channels_last': rows = input_shape[1] cols = input_shape[2] rows = conv_utils.conv_output_length(rows, self.kernel_size[0], self.padding, self.strides[0]) cols = conv_utils.conv_output_length(cols, self.kernel_size[1], self.padding, self.strides[1]) if self.data_format == 'channels_first': return (input_shape[0], self.filters, rows, cols) elif self.data_format == 'channels_last': return (input_shape[0], rows, cols, self.filters) def call(self, inputs): output = K.local_conv2d(inputs, self.kernel, self.kernel_size, self.strides, (self.output_row, self.output_col), self.data_format) if self.use_bias: output = K.bias_add(output, self.bias, data_format=self.data_format) output = self.activation(output) return output def get_config(self): config = { 'filters': self.filters, 'kernel_size': self.kernel_size, 'strides': self.strides, 'padding': self.padding, 'data_format': self.data_format, 'activation': activations.serialize(self.activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint) } base_config = super(LocallyConnected2D, self).get_config() return dict(list(base_config.items()) + list(config.items())) Keras-2.2.4/keras/layers/pooling.py0000644000000000116100000007552213354530144016763 0ustar rooteng00000000000000# -*- coding: utf-8 -*- """Pooling layers. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from .. import backend as K from ..engine.base_layer import Layer from ..engine.base_layer import InputSpec from ..utils import conv_utils from ..legacy import interfaces class _Pooling1D(Layer): """Abstract class for different pooling 1D layers. """ def __init__(self, pool_size=2, strides=None, padding='valid', data_format='channels_last', **kwargs): super(_Pooling1D, self).__init__(**kwargs) if strides is None: strides = pool_size self.pool_size = conv_utils.normalize_tuple(pool_size, 1, 'pool_size') self.strides = conv_utils.normalize_tuple(strides, 1, 'strides') self.padding = conv_utils.normalize_padding(padding) self.data_format = K.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=3) def compute_output_shape(self, input_shape): if self.data_format == 'channels_first': steps = input_shape[2] features = input_shape[1] else: steps = input_shape[1] features = input_shape[2] length = conv_utils.conv_output_length(steps, self.pool_size[0], self.padding, self.strides[0]) if self.data_format == 'channels_first': return (input_shape[0], features, length) else: return (input_shape[0], length, features) def _pooling_function(self, inputs, pool_size, strides, padding, data_format): raise NotImplementedError def call(self, inputs): dummy_axis = 2 if self.data_format == 'channels_last' else 3 inputs = K.expand_dims(inputs, dummy_axis) # add dummy last dimension output = self._pooling_function(inputs=inputs, pool_size=self.pool_size + (1,), strides=self.strides + (1,), padding=self.padding, data_format=self.data_format) return K.squeeze(output, dummy_axis) # remove dummy last dimension def get_config(self): config = {'strides': self.strides, 'pool_size': self.pool_size, 'padding': self.padding, 'data_format': self.data_format} base_config = super(_Pooling1D, self).get_config() return dict(list(base_config.items()) + list(config.items())) class MaxPooling1D(_Pooling1D): """Max pooling operation for temporal data. # Arguments pool_size: Integer, size of the max pooling windows. strides: Integer, or None. Factor by which to downscale. E.g. 2 will halve the input. If None, it will default to `pool_size`. padding: One of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, steps, features)` while `channels_first` corresponds to inputs with shape `(batch, features, steps)`. # Input shape - If `data_format='channels_last'`: 3D tensor with shape: `(batch_size, steps, features)` - If `data_format='channels_first'`: 3D tensor with shape: `(batch_size, features, steps)` # Output shape - If `data_format='channels_last'`: 3D tensor with shape: `(batch_size, downsampled_steps, features)` - If `data_format='channels_first'`: 3D tensor with shape: `(batch_size, features, downsampled_steps)` """ @interfaces.legacy_pooling1d_support def __init__(self, pool_size=2, strides=None, padding='valid', data_format='channels_last', **kwargs): super(MaxPooling1D, self).__init__(pool_size, strides, padding, data_format, **kwargs) def _pooling_function(self, inputs, pool_size, strides, padding, data_format): output = K.pool2d(inputs, pool_size, strides, padding, data_format, pool_mode='max') return output class AveragePooling1D(_Pooling1D): """Average pooling for temporal data. # Arguments pool_size: Integer, size of the average pooling windows. strides: Integer, or None. Factor by which to downscale. E.g. 2 will halve the input. If None, it will default to `pool_size`. padding: One of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, steps, features)` while `channels_first` corresponds to inputs with shape `(batch, features, steps)`. # Input shape - If `data_format='channels_last'`: 3D tensor with shape: `(batch_size, steps, features)` - If `data_format='channels_first'`: 3D tensor with shape: `(batch_size, features, steps)` # Output shape - If `data_format='channels_last'`: 3D tensor with shape: `(batch_size, downsampled_steps, features)` - If `data_format='channels_first'`: 3D tensor with shape: `(batch_size, features, downsampled_steps)` """ @interfaces.legacy_pooling1d_support def __init__(self, pool_size=2, strides=None, padding='valid', data_format='channels_last', **kwargs): super(AveragePooling1D, self).__init__(pool_size, strides, padding, data_format, **kwargs) def _pooling_function(self, inputs, pool_size, strides, padding, data_format): output = K.pool2d(inputs, pool_size, strides, padding, data_format, pool_mode='avg') return output class _Pooling2D(Layer): """Abstract class for different pooling 2D layers. """ def __init__(self, pool_size=(2, 2), strides=None, padding='valid', data_format=None, **kwargs): super(_Pooling2D, self).__init__(**kwargs) if strides is None: strides = pool_size self.pool_size = conv_utils.normalize_tuple(pool_size, 2, 'pool_size') self.strides = conv_utils.normalize_tuple(strides, 2, 'strides') self.padding = conv_utils.normalize_padding(padding) self.data_format = K.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=4) def compute_output_shape(self, input_shape): if self.data_format == 'channels_first': rows = input_shape[2] cols = input_shape[3] elif self.data_format == 'channels_last': rows = input_shape[1] cols = input_shape[2] rows = conv_utils.conv_output_length(rows, self.pool_size[0], self.padding, self.strides[0]) cols = conv_utils.conv_output_length(cols, self.pool_size[1], self.padding, self.strides[1]) if self.data_format == 'channels_first': return (input_shape[0], input_shape[1], rows, cols) elif self.data_format == 'channels_last': return (input_shape[0], rows, cols, input_shape[3]) def _pooling_function(self, inputs, pool_size, strides, padding, data_format): raise NotImplementedError def call(self, inputs): output = self._pooling_function(inputs=inputs, pool_size=self.pool_size, strides=self.strides, padding=self.padding, data_format=self.data_format) return output def get_config(self): config = {'pool_size': self.pool_size, 'padding': self.padding, 'strides': self.strides, 'data_format': self.data_format} base_config = super(_Pooling2D, self).get_config() return dict(list(base_config.items()) + list(config.items())) class MaxPooling2D(_Pooling2D): """Max pooling operation for spatial data. # Arguments pool_size: integer or tuple of 2 integers, factors by which to downscale (vertical, horizontal). (2, 2) will halve the input in both spatial dimension. If only one integer is specified, the same window length will be used for both dimensions. strides: Integer, tuple of 2 integers, or None. Strides values. If None, it will default to `pool_size`. padding: One of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, height, width, channels)` while `channels_first` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, rows, cols, channels)` - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, rows, cols)` # Output shape - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, pooled_rows, pooled_cols, channels)` - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, pooled_rows, pooled_cols)` """ @interfaces.legacy_pooling2d_support def __init__(self, pool_size=(2, 2), strides=None, padding='valid', data_format=None, **kwargs): super(MaxPooling2D, self).__init__(pool_size, strides, padding, data_format, **kwargs) def _pooling_function(self, inputs, pool_size, strides, padding, data_format): output = K.pool2d(inputs, pool_size, strides, padding, data_format, pool_mode='max') return output class AveragePooling2D(_Pooling2D): """Average pooling operation for spatial data. # Arguments pool_size: integer or tuple of 2 integers, factors by which to downscale (vertical, horizontal). (2, 2) will halve the input in both spatial dimension. If only one integer is specified, the same window length will be used for both dimensions. strides: Integer, tuple of 2 integers, or None. Strides values. If None, it will default to `pool_size`. padding: One of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, height, width, channels)` while `channels_first` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, rows, cols, channels)` - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, rows, cols)` # Output shape - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, pooled_rows, pooled_cols, channels)` - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, pooled_rows, pooled_cols)` """ @interfaces.legacy_pooling2d_support def __init__(self, pool_size=(2, 2), strides=None, padding='valid', data_format=None, **kwargs): super(AveragePooling2D, self).__init__(pool_size, strides, padding, data_format, **kwargs) def _pooling_function(self, inputs, pool_size, strides, padding, data_format): output = K.pool2d(inputs, pool_size, strides, padding, data_format, pool_mode='avg') return output class _Pooling3D(Layer): """Abstract class for different pooling 3D layers. """ def __init__(self, pool_size=(2, 2, 2), strides=None, padding='valid', data_format=None, **kwargs): super(_Pooling3D, self).__init__(**kwargs) if strides is None: strides = pool_size self.pool_size = conv_utils.normalize_tuple(pool_size, 3, 'pool_size') self.strides = conv_utils.normalize_tuple(strides, 3, 'strides') self.padding = conv_utils.normalize_padding(padding) self.data_format = K.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=5) def compute_output_shape(self, input_shape): if self.data_format == 'channels_first': len_dim1 = input_shape[2] len_dim2 = input_shape[3] len_dim3 = input_shape[4] elif self.data_format == 'channels_last': len_dim1 = input_shape[1] len_dim2 = input_shape[2] len_dim3 = input_shape[3] len_dim1 = conv_utils.conv_output_length(len_dim1, self.pool_size[0], self.padding, self.strides[0]) len_dim2 = conv_utils.conv_output_length(len_dim2, self.pool_size[1], self.padding, self.strides[1]) len_dim3 = conv_utils.conv_output_length(len_dim3, self.pool_size[2], self.padding, self.strides[2]) if self.data_format == 'channels_first': return (input_shape[0], input_shape[1], len_dim1, len_dim2, len_dim3) elif self.data_format == 'channels_last': return (input_shape[0], len_dim1, len_dim2, len_dim3, input_shape[4]) def _pooling_function(self, inputs, pool_size, strides, padding, data_format): raise NotImplementedError def call(self, inputs): output = self._pooling_function(inputs=inputs, pool_size=self.pool_size, strides=self.strides, padding=self.padding, data_format=self.data_format) return output def get_config(self): config = {'pool_size': self.pool_size, 'padding': self.padding, 'strides': self.strides, 'data_format': self.data_format} base_config = super(_Pooling3D, self).get_config() return dict(list(base_config.items()) + list(config.items())) class MaxPooling3D(_Pooling3D): """Max pooling operation for 3D data (spatial or spatio-temporal). # Arguments pool_size: tuple of 3 integers, factors by which to downscale (dim1, dim2, dim3). (2, 2, 2) will halve the size of the 3D input in each dimension. strides: tuple of 3 integers, or None. Strides values. padding: One of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while `channels_first` corresponds to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape - If `data_format='channels_last'`: 5D tensor with shape: `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - If `data_format='channels_first'`: 5D tensor with shape: `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` # Output shape - If `data_format='channels_last'`: 5D tensor with shape: `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` - If `data_format='channels_first'`: 5D tensor with shape: `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` """ @interfaces.legacy_pooling3d_support def __init__(self, pool_size=(2, 2, 2), strides=None, padding='valid', data_format=None, **kwargs): super(MaxPooling3D, self).__init__(pool_size, strides, padding, data_format, **kwargs) def _pooling_function(self, inputs, pool_size, strides, padding, data_format): output = K.pool3d(inputs, pool_size, strides, padding, data_format, pool_mode='max') return output class AveragePooling3D(_Pooling3D): """Average pooling operation for 3D data (spatial or spatio-temporal). # Arguments pool_size: tuple of 3 integers, factors by which to downscale (dim1, dim2, dim3). (2, 2, 2) will halve the size of the 3D input in each dimension. strides: tuple of 3 integers, or None. Strides values. padding: One of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while `channels_first` corresponds to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape - If `data_format='channels_last'`: 5D tensor with shape: `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - If `data_format='channels_first'`: 5D tensor with shape: `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` # Output shape - If `data_format='channels_last'`: 5D tensor with shape: `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` - If `data_format='channels_first'`: 5D tensor with shape: `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` """ @interfaces.legacy_pooling3d_support def __init__(self, pool_size=(2, 2, 2), strides=None, padding='valid', data_format=None, **kwargs): super(AveragePooling3D, self).__init__(pool_size, strides, padding, data_format, **kwargs) def _pooling_function(self, inputs, pool_size, strides, padding, data_format): output = K.pool3d(inputs, pool_size, strides, padding, data_format, pool_mode='avg') return output class _GlobalPooling1D(Layer): """Abstract class for different global pooling 1D layers. """ def __init__(self, data_format='channels_last', **kwargs): super(_GlobalPooling1D, self).__init__(**kwargs) self.input_spec = InputSpec(ndim=3) self.data_format = K.normalize_data_format(data_format) def compute_output_shape(self, input_shape): if self.data_format == 'channels_first': return (input_shape[0], input_shape[1]) else: return (input_shape[0], input_shape[2]) def call(self, inputs): raise NotImplementedError def get_config(self): config = {'data_format': self.data_format} base_config = super(_GlobalPooling1D, self).get_config() return dict(list(base_config.items()) + list(config.items())) class GlobalAveragePooling1D(_GlobalPooling1D): """Global average pooling operation for temporal data. # Arguments data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, steps, features)` while `channels_first` corresponds to inputs with shape `(batch, features, steps)`. # Input shape - If `data_format='channels_last'`: 3D tensor with shape: `(batch_size, steps, features)` - If `data_format='channels_first'`: 3D tensor with shape: `(batch_size, features, steps)` # Output shape 2D tensor with shape: `(batch_size, features)` """ def __init__(self, data_format='channels_last', **kwargs): super(GlobalAveragePooling1D, self).__init__(data_format, **kwargs) self.supports_masking = True def call(self, inputs, mask=None): steps_axis = 1 if self.data_format == 'channels_last' else 2 if mask is not None: mask = K.cast(mask, K.floatx()) input_shape = K.int_shape(inputs) broadcast_shape = [-1, input_shape[steps_axis], 1] mask = K.reshape(mask, broadcast_shape) inputs *= mask return K.sum(inputs, axis=steps_axis) / K.sum(mask, axis=steps_axis) else: return K.mean(inputs, axis=steps_axis) def compute_mask(self, inputs, mask=None): return None class GlobalMaxPooling1D(_GlobalPooling1D): """Global max pooling operation for temporal data. # Arguments data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, steps, features)` while `channels_first` corresponds to inputs with shape `(batch, features, steps)`. # Input shape - If `data_format='channels_last'`: 3D tensor with shape: `(batch_size, steps, features)` - If `data_format='channels_first'`: 3D tensor with shape: `(batch_size, features, steps)` # Output shape 2D tensor with shape: `(batch_size, features)` """ def call(self, inputs): steps_axis = 1 if self.data_format == 'channels_last' else 2 return K.max(inputs, axis=steps_axis) class _GlobalPooling2D(Layer): """Abstract class for different global pooling 2D layers. """ @interfaces.legacy_global_pooling_support def __init__(self, data_format=None, **kwargs): super(_GlobalPooling2D, self).__init__(**kwargs) self.data_format = K.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=4) def compute_output_shape(self, input_shape): if self.data_format == 'channels_last': return (input_shape[0], input_shape[3]) else: return (input_shape[0], input_shape[1]) def call(self, inputs): raise NotImplementedError def get_config(self): config = {'data_format': self.data_format} base_config = super(_GlobalPooling2D, self).get_config() return dict(list(base_config.items()) + list(config.items())) class GlobalAveragePooling2D(_GlobalPooling2D): """Global average pooling operation for spatial data. # Arguments data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, height, width, channels)` while `channels_first` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, rows, cols, channels)` - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, rows, cols)` # Output shape 2D tensor with shape: `(batch_size, channels)` """ def call(self, inputs): if self.data_format == 'channels_last': return K.mean(inputs, axis=[1, 2]) else: return K.mean(inputs, axis=[2, 3]) class GlobalMaxPooling2D(_GlobalPooling2D): """Global max pooling operation for spatial data. # Arguments data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, height, width, channels)` while `channels_first` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, rows, cols, channels)` - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, rows, cols)` # Output shape 2D tensor with shape: `(batch_size, channels)` """ def call(self, inputs): if self.data_format == 'channels_last': return K.max(inputs, axis=[1, 2]) else: return K.max(inputs, axis=[2, 3]) class _GlobalPooling3D(Layer): """Abstract class for different global pooling 3D layers. """ @interfaces.legacy_global_pooling_support def __init__(self, data_format=None, **kwargs): super(_GlobalPooling3D, self).__init__(**kwargs) self.data_format = K.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=5) def compute_output_shape(self, input_shape): if self.data_format == 'channels_last': return (input_shape[0], input_shape[4]) else: return (input_shape[0], input_shape[1]) def call(self, inputs): raise NotImplementedError def get_config(self): config = {'data_format': self.data_format} base_config = super(_GlobalPooling3D, self).get_config() return dict(list(base_config.items()) + list(config.items())) class GlobalAveragePooling3D(_GlobalPooling3D): """Global Average pooling operation for 3D data. # Arguments data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while `channels_first` corresponds to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape - If `data_format='channels_last'`: 5D tensor with shape: `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - If `data_format='channels_first'`: 5D tensor with shape: `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` # Output shape 2D tensor with shape: `(batch_size, channels)` """ def call(self, inputs): if self.data_format == 'channels_last': return K.mean(inputs, axis=[1, 2, 3]) else: return K.mean(inputs, axis=[2, 3, 4]) class GlobalMaxPooling3D(_GlobalPooling3D): """Global Max pooling operation for 3D data. # Arguments data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while `channels_first` corresponds to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape - If `data_format='channels_last'`: 5D tensor with shape: `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - If `data_format='channels_first'`: 5D tensor with shape: `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` # Output shape 2D tensor with shape: `(batch_size, channels)` """ def call(self, inputs): if self.data_format == 'channels_last': return K.max(inputs, axis=[1, 2, 3]) else: return K.max(inputs, axis=[2, 3, 4]) # Aliases AvgPool1D = AveragePooling1D MaxPool1D = MaxPooling1D AvgPool2D = AveragePooling2D MaxPool2D = MaxPooling2D AvgPool3D = AveragePooling3D MaxPool3D = MaxPooling3D GlobalMaxPool1D = GlobalMaxPooling1D GlobalMaxPool2D = GlobalMaxPooling2D GlobalMaxPool3D = GlobalMaxPooling3D GlobalAvgPool1D = GlobalAveragePooling1D GlobalAvgPool2D = GlobalAveragePooling2D GlobalAvgPool3D = GlobalAveragePooling3D Keras-2.2.4/keras/layers/__init__.py0000644000000000116100000000314513305602621017037 0ustar rooteng00000000000000from __future__ import absolute_import from ..utils.generic_utils import deserialize_keras_object from ..engine.base_layer import Layer from ..engine import Input from ..engine import InputLayer from ..engine.base_layer import InputSpec from .merge import * from .core import * from .convolutional import * from .pooling import * from .local import * from .recurrent import * from .cudnn_recurrent import * from .normalization import * from .embeddings import * from .noise import * from .advanced_activations import * from .wrappers import * from .convolutional_recurrent import * from ..legacy.layers import * def serialize(layer): """Serialize a layer. # Arguments layer: a Layer object. # Returns dictionary with config. """ return {'class_name': layer.__class__.__name__, 'config': layer.get_config()} def deserialize(config, custom_objects=None): """Instantiate a layer from a config dictionary. # Arguments config: dict of the form {'class_name': str, 'config': dict} custom_objects: dict mapping class names (or function names) of custom (non-Keras) objects to class/functions # Returns Layer instance (may be Model, Sequential, Layer...) """ from .. import models globs = globals() # All layers. globs['Model'] = models.Model globs['Sequential'] = models.Sequential return deserialize_keras_object(config, module_objects=globs, custom_objects=custom_objects, printable_module_name='layer') Keras-2.2.4/keras/layers/core.py0000644000000000116100000010631713354530144016241 0ustar rooteng00000000000000# -*- coding: utf-8 -*- """Core Keras layers. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import copy import types as python_types import warnings from .. import backend as K from .. import activations from .. import initializers from .. import regularizers from .. import constraints from ..engine.base_layer import InputSpec from ..engine.base_layer import Layer from ..utils.generic_utils import func_dump from ..utils.generic_utils import func_load from ..utils.generic_utils import deserialize_keras_object from ..utils.generic_utils import has_arg from ..utils import conv_utils from ..legacy import interfaces class Masking(Layer): """Masks a sequence by using a mask value to skip timesteps. For each timestep in the input tensor (dimension #1 in the tensor), if all values in the input tensor at that timestep are equal to `mask_value`, then the timestep will be masked (skipped) in all downstream layers (as long as they support masking). If any downstream layer does not support masking yet receives such an input mask, an exception will be raised. # Example Consider a Numpy data array `x` of shape `(samples, timesteps, features)`, to be fed to an LSTM layer. You want to mask timestep #3 and #5 because you lack data for these timesteps. You can: - set `x[:, 3, :] = 0.` and `x[:, 5, :] = 0.` - insert a `Masking` layer with `mask_value=0.` before the LSTM layer: ```python model = Sequential() model.add(Masking(mask_value=0., input_shape=(timesteps, features))) model.add(LSTM(32)) ``` """ def __init__(self, mask_value=0., **kwargs): super(Masking, self).__init__(**kwargs) self.supports_masking = True self.mask_value = mask_value def compute_mask(self, inputs, mask=None): output_mask = K.any(K.not_equal(inputs, self.mask_value), axis=-1) return output_mask def call(self, inputs): boolean_mask = K.any(K.not_equal(inputs, self.mask_value), axis=-1, keepdims=True) return inputs * K.cast(boolean_mask, K.dtype(inputs)) def get_config(self): config = {'mask_value': self.mask_value} base_config = super(Masking, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape class Dropout(Layer): """Applies Dropout to the input. Dropout consists in randomly setting a fraction `rate` of input units to 0 at each update during training time, which helps prevent overfitting. # Arguments rate: float between 0 and 1. Fraction of the input units to drop. noise_shape: 1D integer tensor representing the shape of the binary dropout mask that will be multiplied with the input. For instance, if your inputs have shape `(batch_size, timesteps, features)` and you want the dropout mask to be the same for all timesteps, you can use `noise_shape=(batch_size, 1, features)`. seed: A Python integer to use as random seed. # References - [Dropout: A Simple Way to Prevent Neural Networks from Overfitting] (http://www.jmlr.org/papers/volume15/srivastava14a/srivastava14a.pdf) """ @interfaces.legacy_dropout_support def __init__(self, rate, noise_shape=None, seed=None, **kwargs): super(Dropout, self).__init__(**kwargs) self.rate = min(1., max(0., rate)) self.noise_shape = noise_shape self.seed = seed self.supports_masking = True def _get_noise_shape(self, inputs): if self.noise_shape is None: return self.noise_shape symbolic_shape = K.shape(inputs) noise_shape = [symbolic_shape[axis] if shape is None else shape for axis, shape in enumerate(self.noise_shape)] return tuple(noise_shape) def call(self, inputs, training=None): if 0. < self.rate < 1.: noise_shape = self._get_noise_shape(inputs) def dropped_inputs(): return K.dropout(inputs, self.rate, noise_shape, seed=self.seed) return K.in_train_phase(dropped_inputs, inputs, training=training) return inputs def get_config(self): config = {'rate': self.rate, 'noise_shape': self.noise_shape, 'seed': self.seed} base_config = super(Dropout, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape class SpatialDropout1D(Dropout): """Spatial 1D version of Dropout. This version performs the same function as Dropout, however it drops entire 1D feature maps instead of individual elements. If adjacent frames within feature maps are strongly correlated (as is normally the case in early convolution layers) then regular dropout will not regularize the activations and will otherwise just result in an effective learning rate decrease. In this case, SpatialDropout1D will help promote independence between feature maps and should be used instead. # Arguments rate: float between 0 and 1. Fraction of the input units to drop. # Input shape 3D tensor with shape: `(samples, timesteps, channels)` # Output shape Same as input # References - [Efficient Object Localization Using Convolutional Networks] (https://arxiv.org/abs/1411.4280) """ @interfaces.legacy_spatialdropout1d_support def __init__(self, rate, **kwargs): super(SpatialDropout1D, self).__init__(rate, **kwargs) self.input_spec = InputSpec(ndim=3) def _get_noise_shape(self, inputs): input_shape = K.shape(inputs) noise_shape = (input_shape[0], 1, input_shape[2]) return noise_shape class SpatialDropout2D(Dropout): """Spatial 2D version of Dropout. This version performs the same function as Dropout, however it drops entire 2D feature maps instead of individual elements. If adjacent pixels within feature maps are strongly correlated (as is normally the case in early convolution layers) then regular dropout will not regularize the activations and will otherwise just result in an effective learning rate decrease. In this case, SpatialDropout2D will help promote independence between feature maps and should be used instead. # Arguments rate: float between 0 and 1. Fraction of the input units to drop. data_format: 'channels_first' or 'channels_last'. In 'channels_first' mode, the channels dimension (the depth) is at index 1, in 'channels_last' mode is it at index 3. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape 4D tensor with shape: `(samples, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, rows, cols, channels)` if data_format='channels_last'. # Output shape Same as input # References - [Efficient Object Localization Using Convolutional Networks] (https://arxiv.org/abs/1411.4280) """ @interfaces.legacy_spatialdropoutNd_support def __init__(self, rate, data_format=None, **kwargs): super(SpatialDropout2D, self).__init__(rate, **kwargs) self.data_format = K.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=4) def _get_noise_shape(self, inputs): input_shape = K.shape(inputs) if self.data_format == 'channels_first': noise_shape = (input_shape[0], input_shape[1], 1, 1) else: noise_shape = (input_shape[0], 1, 1, input_shape[3]) return noise_shape class SpatialDropout3D(Dropout): """Spatial 3D version of Dropout. This version performs the same function as Dropout, however it drops entire 3D feature maps instead of individual elements. If adjacent voxels within feature maps are strongly correlated (as is normally the case in early convolution layers) then regular dropout will not regularize the activations and will otherwise just result in an effective learning rate decrease. In this case, SpatialDropout3D will help promote independence between feature maps and should be used instead. # Arguments rate: float between 0 and 1. Fraction of the input units to drop. data_format: 'channels_first' or 'channels_last'. In 'channels_first' mode, the channels dimension (the depth) is at index 1, in 'channels_last' mode is it at index 4. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape 5D tensor with shape: `(samples, channels, dim1, dim2, dim3)` if data_format='channels_first' or 5D tensor with shape: `(samples, dim1, dim2, dim3, channels)` if data_format='channels_last'. # Output shape Same as input # References - [Efficient Object Localization Using Convolutional Networks] (https://arxiv.org/abs/1411.4280) """ @interfaces.legacy_spatialdropoutNd_support def __init__(self, rate, data_format=None, **kwargs): super(SpatialDropout3D, self).__init__(rate, **kwargs) self.data_format = K.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=5) def _get_noise_shape(self, inputs): input_shape = K.shape(inputs) if self.data_format == 'channels_first': noise_shape = (input_shape[0], input_shape[1], 1, 1, 1) else: noise_shape = (input_shape[0], 1, 1, 1, input_shape[4]) return noise_shape class Activation(Layer): """Applies an activation function to an output. # Arguments activation: name of activation function to use (see: [activations](../activations.md)), or alternatively, a Theano or TensorFlow operation. # Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Same shape as input. """ def __init__(self, activation, **kwargs): super(Activation, self).__init__(**kwargs) self.supports_masking = True self.activation = activations.get(activation) def call(self, inputs): return self.activation(inputs) def get_config(self): config = {'activation': activations.serialize(self.activation)} base_config = super(Activation, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape class Reshape(Layer): """Reshapes an output to a certain shape. # Arguments target_shape: target shape. Tuple of integers. Does not include the batch axis. # Input shape Arbitrary, although all dimensions in the input shaped must be fixed. Use the keyword argument `input_shape` (tuple of integers, does not include the batch axis) when using this layer as the first layer in a model. # Output shape `(batch_size,) + target_shape` # Example ```python # as first layer in a Sequential model model = Sequential() model.add(Reshape((3, 4), input_shape=(12,))) # now: model.output_shape == (None, 3, 4) # note: `None` is the batch dimension # as intermediate layer in a Sequential model model.add(Reshape((6, 2))) # now: model.output_shape == (None, 6, 2) # also supports shape inference using `-1` as dimension model.add(Reshape((-1, 2, 2))) # now: model.output_shape == (None, 3, 2, 2) ``` """ def __init__(self, target_shape, **kwargs): super(Reshape, self).__init__(**kwargs) self.target_shape = tuple(target_shape) def _fix_unknown_dimension(self, input_shape, output_shape): """Finds and replaces a missing dimension in an output shape. This is a near direct port of the internal Numpy function `_fix_unknown_dimension` in `numpy/core/src/multiarray/shape.c` # Arguments input_shape: original shape of array being reshaped output_shape: target shape of the array, with at most a single -1 which indicates a dimension that should be derived from the input shape. # Returns The new output shape with a `-1` replaced with its computed value. # Raises ValueError: if `input_shape` and `output_shape` do not match. """ output_shape = list(output_shape) msg = 'total size of new array must be unchanged' known, unknown = 1, None for index, dim in enumerate(output_shape): if dim < 0: if unknown is None: unknown = index else: raise ValueError('Can only specify one unknown dimension.') else: known *= dim original = np.prod(input_shape, dtype=int) if unknown is not None: if known == 0 or original % known != 0: raise ValueError(msg) output_shape[unknown] = original // known elif original != known: raise ValueError(msg) return tuple(output_shape) def compute_output_shape(self, input_shape): if None in input_shape[1:]: # input shape (partially) unknown? replace -1's with None's return ((input_shape[0],) + tuple(s if s != -1 else None for s in self.target_shape)) else: # input shape known? then we can compute the output shape return (input_shape[0],) + self._fix_unknown_dimension( input_shape[1:], self.target_shape) def call(self, inputs): return K.reshape(inputs, (K.shape(inputs)[0],) + self.target_shape) def get_config(self): config = {'target_shape': self.target_shape} base_config = super(Reshape, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Permute(Layer): """Permutes the dimensions of the input according to a given pattern. Useful for e.g. connecting RNNs and convnets together. # Example ```python model = Sequential() model.add(Permute((2, 1), input_shape=(10, 64))) # now: model.output_shape == (None, 64, 10) # note: `None` is the batch dimension ``` # Arguments dims: Tuple of integers. Permutation pattern, does not include the samples dimension. Indexing starts at 1. For instance, `(2, 1)` permutes the first and second dimension of the input. # Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Same as the input shape, but with the dimensions re-ordered according to the specified pattern. """ def __init__(self, dims, **kwargs): super(Permute, self).__init__(**kwargs) self.dims = tuple(dims) self.input_spec = InputSpec(ndim=len(self.dims) + 1) def compute_output_shape(self, input_shape): input_shape = list(input_shape) output_shape = copy.copy(input_shape) for i, dim in enumerate(self.dims): target_dim = input_shape[dim] output_shape[i + 1] = target_dim return tuple(output_shape) def call(self, inputs): return K.permute_dimensions(inputs, (0,) + self.dims) def get_config(self): config = {'dims': self.dims} base_config = super(Permute, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Flatten(Layer): """Flattens the input. Does not affect the batch size. # Arguments data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. The purpose of this argument is to preserve weight ordering when switching a model from one data format to another. `channels_last` corresponds to inputs with shape `(batch, ..., channels)` while `channels_first` corresponds to inputs with shape `(batch, channels, ...)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Example ```python model = Sequential() model.add(Conv2D(64, (3, 3), input_shape=(3, 32, 32), padding='same',)) # now: model.output_shape == (None, 64, 32, 32) model.add(Flatten()) # now: model.output_shape == (None, 65536) ``` """ def __init__(self, data_format=None, **kwargs): super(Flatten, self).__init__(**kwargs) self.input_spec = InputSpec(min_ndim=3) self.data_format = K.normalize_data_format(data_format) def compute_output_shape(self, input_shape): if not all(input_shape[1:]): raise ValueError('The shape of the input to "Flatten" ' 'is not fully defined ' '(got ' + str(input_shape[1:]) + '. ' 'Make sure to pass a complete "input_shape" ' 'or "batch_input_shape" argument to the first ' 'layer in your model.') return (input_shape[0], np.prod(input_shape[1:])) def call(self, inputs): if self.data_format == 'channels_first': # Ensure works for any dim permutation = [0] permutation.extend([i for i in range(2, K.ndim(inputs))]) permutation.append(1) inputs = K.permute_dimensions(inputs, permutation) return K.batch_flatten(inputs) def get_config(self): config = {'data_format': self.data_format} base_config = super(Flatten, self).get_config() return dict(list(base_config.items()) + list(config.items())) class RepeatVector(Layer): """Repeats the input n times. # Example ```python model = Sequential() model.add(Dense(32, input_dim=32)) # now: model.output_shape == (None, 32) # note: `None` is the batch dimension model.add(RepeatVector(3)) # now: model.output_shape == (None, 3, 32) ``` # Arguments n: integer, repetition factor. # Input shape 2D tensor of shape `(num_samples, features)`. # Output shape 3D tensor of shape `(num_samples, n, features)`. """ def __init__(self, n, **kwargs): super(RepeatVector, self).__init__(**kwargs) self.n = n self.input_spec = InputSpec(ndim=2) def compute_output_shape(self, input_shape): return (input_shape[0], self.n, input_shape[1]) def call(self, inputs): return K.repeat(inputs, self.n) def get_config(self): config = {'n': self.n} base_config = super(RepeatVector, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Lambda(Layer): """Wraps arbitrary expression as a `Layer` object. # Examples ```python # add a x -> x^2 layer model.add(Lambda(lambda x: x ** 2)) ``` ```python # add a layer that returns the concatenation # of the positive part of the input and # the opposite of the negative part def antirectifier(x): x -= K.mean(x, axis=1, keepdims=True) x = K.l2_normalize(x, axis=1) pos = K.relu(x) neg = K.relu(-x) return K.concatenate([pos, neg], axis=1) def antirectifier_output_shape(input_shape): shape = list(input_shape) assert len(shape) == 2 # only valid for 2D tensors shape[-1] *= 2 return tuple(shape) model.add(Lambda(antirectifier, output_shape=antirectifier_output_shape)) ``` # Arguments function: The function to be evaluated. Takes input tensor as first argument. output_shape: Expected output shape from function. Only relevant when using Theano. Can be a tuple or function. If a tuple, it only specifies the first dimension onward; sample dimension is assumed either the same as the input: `output_shape = (input_shape[0], ) + output_shape` or, the input is `None` and the sample dimension is also `None`: `output_shape = (None, ) + output_shape` If a function, it specifies the entire shape as a function of the input shape: `output_shape = f(input_shape)` arguments: optional dictionary of keyword arguments to be passed to the function. # Input shape Arbitrary. Use the keyword argument input_shape (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Specified by `output_shape` argument (or auto-inferred when using TensorFlow or CNTK). """ @interfaces.legacy_lambda_support def __init__(self, function, output_shape=None, mask=None, arguments=None, **kwargs): super(Lambda, self).__init__(**kwargs) self.function = function self.arguments = arguments if arguments else {} if mask is not None: self.supports_masking = True self.mask = mask if output_shape is None: self._output_shape = None elif isinstance(output_shape, (tuple, list)): self._output_shape = tuple(output_shape) else: if not callable(output_shape): raise TypeError('In Lambda, `output_shape` ' 'must be a list, a tuple, or a function.') self._output_shape = output_shape def compute_output_shape(self, input_shape): if self._output_shape is None: # With TensorFlow or CNTK, we can infer the output shape directly: if K.backend() in ('tensorflow', 'cntk'): if isinstance(input_shape, list): xs = [K.placeholder(shape=shape) for shape in input_shape] x = self.call(xs) else: x = K.placeholder(shape=input_shape) x = self.call(x) if isinstance(x, list): return [K.int_shape(x_elem) for x_elem in x] else: return K.int_shape(x) # Otherwise, we default to the input shape. warnings.warn('`output_shape` argument not specified for layer {} ' 'and cannot be automatically inferred ' 'with the Theano backend. ' 'Defaulting to output shape `{}` ' '(same as input shape). ' 'If the expected output shape is different, ' 'specify it via the `output_shape` argument.' .format(self.name, input_shape)) return input_shape elif isinstance(self._output_shape, (tuple, list)): if isinstance(input_shape, list): num_samples = input_shape[0][0] else: num_samples = input_shape[0] if input_shape else None return (num_samples,) + tuple(self._output_shape) else: shape = self._output_shape(input_shape) if not isinstance(shape, (list, tuple)): raise ValueError('`output_shape` function must return a tuple or ' 'a list of tuples.') if isinstance(shape, list): if isinstance(shape[0], int) or shape[0] is None: shape = tuple(shape) return shape def call(self, inputs, mask=None): arguments = self.arguments if has_arg(self.function, 'mask'): arguments['mask'] = mask return self.function(inputs, **arguments) def compute_mask(self, inputs, mask=None): if callable(self.mask): return self.mask(inputs, mask) return self.mask def get_config(self): if isinstance(self.function, python_types.LambdaType): function = func_dump(self.function) function_type = 'lambda' else: function = self.function.__name__ function_type = 'function' if isinstance(self._output_shape, python_types.LambdaType): output_shape = func_dump(self._output_shape) output_shape_type = 'lambda' elif callable(self._output_shape): output_shape = self._output_shape.__name__ output_shape_type = 'function' else: output_shape = self._output_shape output_shape_type = 'raw' config = {'function': function, 'function_type': function_type, 'output_shape': output_shape, 'output_shape_type': output_shape_type, 'arguments': self.arguments} base_config = super(Lambda, self).get_config() return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config, custom_objects=None): config = config.copy() globs = globals() if custom_objects: globs = dict(list(globs.items()) + list(custom_objects.items())) function_type = config.pop('function_type') if function_type == 'function': # Simple lookup in custom objects function = deserialize_keras_object( config['function'], custom_objects=custom_objects, printable_module_name='function in Lambda layer') elif function_type == 'lambda': # Unsafe deserialization from bytecode function = func_load(config['function'], globs=globs) else: raise TypeError('Unknown function type:', function_type) output_shape_type = config.pop('output_shape_type') if output_shape_type == 'function': # Simple lookup in custom objects output_shape = deserialize_keras_object( config['output_shape'], custom_objects=custom_objects, printable_module_name='output_shape function in Lambda layer') elif output_shape_type == 'lambda': # Unsafe deserialization from bytecode output_shape = func_load(config['output_shape'], globs=globs) else: output_shape = config['output_shape'] # If arguments were numpy array, they have been saved as # list. We need to recover the ndarray if 'arguments' in config: for key in config['arguments']: if isinstance(config['arguments'][key], dict): arg_dict = config['arguments'][key] if 'type' in arg_dict and arg_dict['type'] == 'ndarray': # Overwrite the argument with its numpy translation config['arguments'][key] = np.array(arg_dict['value']) config['function'] = function config['output_shape'] = output_shape return cls(**config) class Dense(Layer): """Just your regular densely-connected NN layer. `Dense` implements the operation: `output = activation(dot(input, kernel) + bias)` where `activation` is the element-wise activation function passed as the `activation` argument, `kernel` is a weights matrix created by the layer, and `bias` is a bias vector created by the layer (only applicable if `use_bias` is `True`). Note: if the input to the layer has a rank greater than 2, then it is flattened prior to the initial dot product with `kernel`. # Example ```python # as first layer in a sequential model: model = Sequential() model.add(Dense(32, input_shape=(16,))) # now the model will take as input arrays of shape (*, 16) # and output arrays of shape (*, 32) # after the first layer, you don't need to specify # the size of the input anymore: model.add(Dense(32)) ``` # Arguments units: Positive integer, dimensionality of the output space. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the `kernel` weights matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). # Input shape nD tensor with shape: `(batch_size, ..., input_dim)`. The most common situation would be a 2D input with shape `(batch_size, input_dim)`. # Output shape nD tensor with shape: `(batch_size, ..., units)`. For instance, for a 2D input with shape `(batch_size, input_dim)`, the output would have shape `(batch_size, units)`. """ @interfaces.legacy_dense_support def __init__(self, units, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs): if 'input_shape' not in kwargs and 'input_dim' in kwargs: kwargs['input_shape'] = (kwargs.pop('input_dim'),) super(Dense, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.bias_constraint = constraints.get(bias_constraint) self.input_spec = InputSpec(min_ndim=2) self.supports_masking = True def build(self, input_shape): assert len(input_shape) >= 2 input_dim = input_shape[-1] self.kernel = self.add_weight(shape=(input_dim, self.units), initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self.bias = self.add_weight(shape=(self.units,), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) self.built = True def call(self, inputs): output = K.dot(inputs, self.kernel) if self.use_bias: output = K.bias_add(output, self.bias, data_format='channels_last') if self.activation is not None: output = self.activation(output) return output def compute_output_shape(self, input_shape): assert input_shape and len(input_shape) >= 2 assert input_shape[-1] output_shape = list(input_shape) output_shape[-1] = self.units return tuple(output_shape) def get_config(self): config = { 'units': self.units, 'activation': activations.serialize(self.activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint) } base_config = super(Dense, self).get_config() return dict(list(base_config.items()) + list(config.items())) class ActivityRegularization(Layer): """Layer that applies an update to the cost function based input activity. # Arguments l1: L1 regularization factor (positive float). l2: L2 regularization factor (positive float). # Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Same shape as input. """ def __init__(self, l1=0., l2=0., **kwargs): super(ActivityRegularization, self).__init__(**kwargs) self.supports_masking = True self.l1 = l1 self.l2 = l2 self.activity_regularizer = regularizers.L1L2(l1=l1, l2=l2) def get_config(self): config = {'l1': self.l1, 'l2': self.l2} base_config = super(ActivityRegularization, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape Keras-2.2.4/keras/layers/embeddings.py0000644000000000116100000001474213354530144017412 0ustar rooteng00000000000000"""Embedding layer. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from .. import backend as K from .. import initializers from .. import regularizers from .. import constraints from ..engine.base_layer import Layer from ..legacy import interfaces from ..utils.generic_utils import to_list class Embedding(Layer): """Turns positive integers (indexes) into dense vectors of fixed size. eg. [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]] This layer can only be used as the first layer in a model. # Example ```python model = Sequential() model.add(Embedding(1000, 64, input_length=10)) # the model will take as input an integer matrix of size (batch, input_length). # the largest integer (i.e. word index) in the input should be # no larger than 999 (vocabulary size). # now model.output_shape == (None, 10, 64), where None is the batch dimension. input_array = np.random.randint(1000, size=(32, 10)) model.compile('rmsprop', 'mse') output_array = model.predict(input_array) assert output_array.shape == (32, 10, 64) ``` # Arguments input_dim: int > 0. Size of the vocabulary, i.e. maximum integer index + 1. output_dim: int >= 0. Dimension of the dense embedding. embeddings_initializer: Initializer for the `embeddings` matrix (see [initializers](../initializers.md)). embeddings_regularizer: Regularizer function applied to the `embeddings` matrix (see [regularizer](../regularizers.md)). embeddings_constraint: Constraint function applied to the `embeddings` matrix (see [constraints](../constraints.md)). mask_zero: Whether or not the input value 0 is a special "padding" value that should be masked out. This is useful when using [recurrent layers](recurrent.md) which may take variable length input. If this is `True` then all subsequent layers in the model need to support masking or an exception will be raised. If mask_zero is set to True, as a consequence, index 0 cannot be used in the vocabulary (input_dim should equal size of vocabulary + 1). input_length: Length of input sequences, when it is constant. This argument is required if you are going to connect `Flatten` then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed). # Input shape 2D tensor with shape: `(batch_size, sequence_length)`. # Output shape 3D tensor with shape: `(batch_size, sequence_length, output_dim)`. # References - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) """ @interfaces.legacy_embedding_support def __init__(self, input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, activity_regularizer=None, embeddings_constraint=None, mask_zero=False, input_length=None, **kwargs): if 'input_shape' not in kwargs: if input_length: kwargs['input_shape'] = (input_length,) else: kwargs['input_shape'] = (None,) super(Embedding, self).__init__(**kwargs) self.input_dim = input_dim self.output_dim = output_dim self.embeddings_initializer = initializers.get(embeddings_initializer) self.embeddings_regularizer = regularizers.get(embeddings_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.embeddings_constraint = constraints.get(embeddings_constraint) self.mask_zero = mask_zero self.supports_masking = mask_zero self.input_length = input_length def build(self, input_shape): self.embeddings = self.add_weight( shape=(self.input_dim, self.output_dim), initializer=self.embeddings_initializer, name='embeddings', regularizer=self.embeddings_regularizer, constraint=self.embeddings_constraint, dtype=self.dtype) self.built = True def compute_mask(self, inputs, mask=None): if not self.mask_zero: return None output_mask = K.not_equal(inputs, 0) return output_mask def compute_output_shape(self, input_shape): if self.input_length is None: return input_shape + (self.output_dim,) else: # input_length can be tuple if input is 3D or higher in_lens = to_list(self.input_length, allow_tuple=True) if len(in_lens) != len(input_shape) - 1: raise ValueError( '"input_length" is %s, but received input has shape %s' % (str(self.input_length), str(input_shape))) else: for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])): if s1 is not None and s2 is not None and s1 != s2: raise ValueError( '"input_length" is %s, but received input has shape %s' % (str(self.input_length), str(input_shape))) elif s1 is None: in_lens[i] = s2 return (input_shape[0],) + tuple(in_lens) + (self.output_dim,) def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') out = K.gather(self.embeddings, inputs) return out def get_config(self): config = {'input_dim': self.input_dim, 'output_dim': self.output_dim, 'embeddings_initializer': initializers.serialize(self.embeddings_initializer), 'embeddings_regularizer': regularizers.serialize(self.embeddings_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'embeddings_constraint': constraints.serialize(self.embeddings_constraint), 'mask_zero': self.mask_zero, 'input_length': self.input_length} base_config = super(Embedding, self).get_config() return dict(list(base_config.items()) + list(config.items())) Keras-2.2.4/keras/layers/wrappers.py0000644000000000116100000006230313354530144017150 0ustar rooteng00000000000000# -*- coding: utf-8 -*- """Layers that augment the functionality of a base layer. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy from ..engine.base_layer import Layer from ..engine.base_layer import InputSpec from ..utils.generic_utils import has_arg from ..utils.generic_utils import object_list_uid from .. import backend as K from . import recurrent class Wrapper(Layer): """Abstract wrapper base class. Wrappers take another layer and augment it in various ways. Do not use this class as a layer, it is only an abstract base class. Two usable wrappers are the `TimeDistributed` and `Bidirectional` wrappers. # Arguments layer: The layer to be wrapped. """ def __init__(self, layer, **kwargs): self.layer = layer # Tracks mapping of Wrapper inputs to inner layer inputs. Useful when # the inner layer has update ops that depend on its inputs (as opposed # to the inputs to the Wrapper layer). self._input_map = {} super(Wrapper, self).__init__(**kwargs) def build(self, input_shape=None): self.built = True @property def activity_regularizer(self): if hasattr(self.layer, 'activity_regularizer'): return self.layer.activity_regularizer else: return None @property def trainable(self): return self.layer.trainable @trainable.setter def trainable(self, value): self.layer.trainable = value @property def trainable_weights(self): return self.layer.trainable_weights @property def non_trainable_weights(self): return self.layer.non_trainable_weights @property def updates(self): if hasattr(self.layer, 'updates'): return self.layer.updates return [] def get_updates_for(self, inputs=None): # If the wrapper modifies the inputs, use the modified inputs to # get the updates from the inner layer. inner_inputs = inputs if inputs is not None: uid = object_list_uid(inputs) if uid in self._input_map: inner_inputs = self._input_map[uid] updates = self.layer.get_updates_for(inner_inputs) updates += super(Wrapper, self).get_updates_for(inputs) return updates @property def losses(self): if hasattr(self.layer, 'losses'): return self.layer.losses return [] def get_losses_for(self, inputs=None): if inputs is None: losses = self.layer.get_losses_for(None) return losses + super(Wrapper, self).get_losses_for(None) return super(Wrapper, self).get_losses_for(inputs) def get_weights(self): return self.layer.get_weights() def set_weights(self, weights): self.layer.set_weights(weights) def get_config(self): config = {'layer': {'class_name': self.layer.__class__.__name__, 'config': self.layer.get_config()}} base_config = super(Wrapper, self).get_config() return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config, custom_objects=None): from . import deserialize as deserialize_layer layer = deserialize_layer(config.pop('layer'), custom_objects=custom_objects) return cls(layer, **config) class TimeDistributed(Wrapper): """This wrapper applies a layer to every temporal slice of an input. The input should be at least 3D, and the dimension of index one will be considered to be the temporal dimension. Consider a batch of 32 samples, where each sample is a sequence of 10 vectors of 16 dimensions. The batch input shape of the layer is then `(32, 10, 16)`, and the `input_shape`, not including the samples dimension, is `(10, 16)`. You can then use `TimeDistributed` to apply a `Dense` layer to each of the 10 timesteps, independently: ```python # as the first layer in a model model = Sequential() model.add(TimeDistributed(Dense(8), input_shape=(10, 16))) # now model.output_shape == (None, 10, 8) ``` The output will then have shape `(32, 10, 8)`. In subsequent layers, there is no need for the `input_shape`: ```python model.add(TimeDistributed(Dense(32))) # now model.output_shape == (None, 10, 32) ``` The output will then have shape `(32, 10, 32)`. `TimeDistributed` can be used with arbitrary layers, not just `Dense`, for instance with a `Conv2D` layer: ```python model = Sequential() model.add(TimeDistributed(Conv2D(64, (3, 3)), input_shape=(10, 299, 299, 3))) ``` # Arguments layer: a layer instance. """ def __init__(self, layer, **kwargs): super(TimeDistributed, self).__init__(layer, **kwargs) self.supports_masking = True def _get_shape_tuple(self, init_tuple, tensor, start_idx, int_shape=None): """Finds non-specific dimensions in the static shapes and replaces them by the corresponding dynamic shapes of the tensor. # Arguments init_tuple: a tuple, the first part of the output shape tensor: the tensor from which to get the (static and dynamic) shapes as the last part of the output shape start_idx: int, which indicate the first dimension to take from the static shape of the tensor int_shape: an alternative static shape to take as the last part of the output shape # Returns The new int_shape with the first part from init_tuple and the last part from either `int_shape` (if provided) or K.int_shape(tensor), where every `None` is replaced by the corresponding dimension from K.shape(tensor) """ # replace all None in int_shape by K.shape if int_shape is None: int_shape = K.int_shape(tensor)[start_idx:] if not any(not s for s in int_shape): return init_tuple + int_shape tensor_shape = K.shape(tensor) int_shape = list(int_shape) for i, s in enumerate(int_shape): if not s: int_shape[i] = tensor_shape[start_idx + i] return init_tuple + tuple(int_shape) def build(self, input_shape): assert len(input_shape) >= 3 self.input_spec = InputSpec(shape=input_shape) child_input_shape = (input_shape[0],) + input_shape[2:] if not self.layer.built: self.layer.build(child_input_shape) self.layer.built = True super(TimeDistributed, self).build() def compute_output_shape(self, input_shape): child_input_shape = (input_shape[0],) + input_shape[2:] child_output_shape = self.layer.compute_output_shape(child_input_shape) timesteps = input_shape[1] return (child_output_shape[0], timesteps) + child_output_shape[1:] def call(self, inputs, training=None, mask=None): kwargs = {} if has_arg(self.layer.call, 'training'): kwargs['training'] = training uses_learning_phase = False input_shape = K.int_shape(inputs) if input_shape[0]: # batch size matters, use rnn-based implementation def step(x, _): global uses_learning_phase output = self.layer.call(x, **kwargs) if hasattr(output, '_uses_learning_phase'): uses_learning_phase = (output._uses_learning_phase or uses_learning_phase) return output, [] _, outputs, _ = K.rnn(step, inputs, initial_states=[], input_length=input_shape[1], unroll=False) y = outputs else: # No batch size specified, therefore the layer will be able # to process batches of any size. # We can go with reshape-based implementation for performance. input_length = input_shape[1] if not input_length: input_length = K.shape(inputs)[1] inner_input_shape = self._get_shape_tuple((-1,), inputs, 2) # Shape: (num_samples * timesteps, ...). And track the # transformation in self._input_map. input_uid = object_list_uid(inputs) inputs = K.reshape(inputs, inner_input_shape) self._input_map[input_uid] = inputs # (num_samples * timesteps, ...) if has_arg(self.layer.call, 'mask') and mask is not None: inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) kwargs['mask'] = K.reshape(mask, inner_mask_shape) y = self.layer.call(inputs, **kwargs) if hasattr(y, '_uses_learning_phase'): uses_learning_phase = y._uses_learning_phase # Shape: (num_samples, timesteps, ...) output_shape = self.compute_output_shape(input_shape) output_shape = self._get_shape_tuple( (-1, input_length), y, 1, output_shape[2:]) y = K.reshape(y, output_shape) # Apply activity regularizer if any: if (hasattr(self.layer, 'activity_regularizer') and self.layer.activity_regularizer is not None): regularization_loss = self.layer.activity_regularizer(y) self.add_loss(regularization_loss, inputs) if uses_learning_phase: y._uses_learning_phase = True return y def compute_mask(self, inputs, mask=None): """Computes an output mask tensor for Embedding layer based on the inputs, mask, and the inner layer. If batch size is specified: Simply return the input `mask`. (An rnn-based implementation with more than one rnn inputs is required but not supported in Keras yet.) Otherwise we call `compute_mask` of the inner layer at each time step. If the output mask at each time step is not `None`: (E.g., inner layer is Masking or RNN) Concatenate all of them and return the concatenation. If the output mask at each time step is `None` and the input mask is not `None`: (E.g., inner layer is Dense) Reduce the input_mask to 2 dimensions and return it. Otherwise (both the output mask and the input mask are `None`): (E.g., `mask` is not used at all) Return `None`. # Arguments inputs: Tensor mask: Tensor # Returns None or a tensor """ # cases need to call the layer.compute_mask when input_mask is None: # Masking layer and Embedding layer with mask_zero input_shape = K.int_shape(inputs) if input_shape[0]: # batch size matters, we currently do not handle mask explicitly return mask inner_mask = mask if inner_mask is not None: inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) inner_mask = K.reshape(inner_mask, inner_mask_shape) input_uid = object_list_uid(inputs) inner_inputs = self._input_map[input_uid] output_mask = self.layer.compute_mask(inner_inputs, inner_mask) if output_mask is None: if mask is None: return None # input_mask is not None, and output_mask is None: # we should return a not-None mask output_mask = mask for _ in range(2, len(K.int_shape(mask))): output_mask = K.any(output_mask, axis=-1) else: # output_mask is not None. We need to reshape it input_length = input_shape[1] if not input_length: input_length = K.shape(inputs)[1] output_mask_int_shape = K.int_shape(output_mask) if output_mask_int_shape is None: # if the output_mask does not have a static shape, # its shape must be the same as mask's if mask is not None: output_mask_int_shape = K.int_shape(mask) else: output_mask_int_shape = K.compute_output_shape(input_shape)[:-1] output_mask_shape = self._get_shape_tuple( (-1, input_length), output_mask, 1, output_mask_int_shape[1:]) output_mask = K.reshape(output_mask, output_mask_shape) return output_mask class Bidirectional(Wrapper): """Bidirectional wrapper for RNNs. # Arguments layer: `Recurrent` instance. merge_mode: Mode by which outputs of the forward and backward RNNs will be combined. One of {'sum', 'mul', 'concat', 'ave', None}. If None, the outputs will not be combined, they will be returned as a list. # Raises ValueError: In case of invalid `merge_mode` argument. # Examples ```python model = Sequential() model.add(Bidirectional(LSTM(10, return_sequences=True), input_shape=(5, 10))) model.add(Bidirectional(LSTM(10))) model.add(Dense(5)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') ``` """ def __init__(self, layer, merge_mode='concat', weights=None, **kwargs): if merge_mode not in ['sum', 'mul', 'ave', 'concat', None]: raise ValueError('Invalid merge mode. ' 'Merge mode should be one of ' '{"sum", "mul", "ave", "concat", None}') self.forward_layer = copy.copy(layer) config = layer.get_config() config['go_backwards'] = not config['go_backwards'] self.backward_layer = layer.__class__.from_config(config) self.forward_layer.name = 'forward_' + self.forward_layer.name self.backward_layer.name = 'backward_' + self.backward_layer.name self.merge_mode = merge_mode if weights: nw = len(weights) self.forward_layer.initial_weights = weights[:nw // 2] self.backward_layer.initial_weights = weights[nw // 2:] self.stateful = layer.stateful self.return_sequences = layer.return_sequences self.return_state = layer.return_state self.supports_masking = True self._trainable = True super(Bidirectional, self).__init__(layer, **kwargs) self.input_spec = layer.input_spec self._num_constants = None @property def trainable(self): return self._trainable @trainable.setter def trainable(self, value): self._trainable = value self.forward_layer.trainable = value self.backward_layer.trainable = value def get_weights(self): return self.forward_layer.get_weights() + self.backward_layer.get_weights() def set_weights(self, weights): nw = len(weights) self.forward_layer.set_weights(weights[:nw // 2]) self.backward_layer.set_weights(weights[nw // 2:]) def compute_output_shape(self, input_shape): output_shape = self.forward_layer.compute_output_shape(input_shape) if self.return_state: state_shape = output_shape[1:] output_shape = output_shape[0] if self.merge_mode == 'concat': output_shape = list(output_shape) output_shape[-1] *= 2 output_shape = tuple(output_shape) elif self.merge_mode is None: output_shape = [output_shape, copy.copy(output_shape)] if self.return_state: if self.merge_mode is None: return output_shape + state_shape + copy.copy(state_shape) return [output_shape] + state_shape + copy.copy(state_shape) return output_shape def __call__(self, inputs, initial_state=None, constants=None, **kwargs): inputs, initial_state, constants = recurrent._standardize_args( inputs, initial_state, constants, self._num_constants) if initial_state is None and constants is None: return super(Bidirectional, self).__call__(inputs, **kwargs) # Applies the same workaround as in `RNN.__call__` additional_inputs = [] additional_specs = [] if initial_state is not None: # Check if `initial_state` can be splitted into half num_states = len(initial_state) if num_states % 2 > 0: raise ValueError( 'When passing `initial_state` to a Bidirectional RNN, ' 'the state should be a list containing the states of ' 'the underlying RNNs. ' 'Found: ' + str(initial_state)) kwargs['initial_state'] = initial_state additional_inputs += initial_state state_specs = [InputSpec(shape=K.int_shape(state)) for state in initial_state] self.forward_layer.state_spec = state_specs[:num_states // 2] self.backward_layer.state_spec = state_specs[num_states // 2:] additional_specs += state_specs if constants is not None: kwargs['constants'] = constants additional_inputs += constants constants_spec = [InputSpec(shape=K.int_shape(constant)) for constant in constants] self.forward_layer.constants_spec = constants_spec self.backward_layer.constants_spec = constants_spec additional_specs += constants_spec self._num_constants = len(constants) self.forward_layer._num_constants = self._num_constants self.backward_layer._num_constants = self._num_constants is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) for tensor in additional_inputs: if K.is_keras_tensor(tensor) != is_keras_tensor: raise ValueError('The initial state of a Bidirectional' ' layer cannot be specified with a mix of' ' Keras tensors and non-Keras tensors' ' (a "Keras tensor" is a tensor that was' ' returned by a Keras layer, or by `Input`)') if is_keras_tensor: # Compute the full input spec, including state full_input = [inputs] + additional_inputs full_input_spec = self.input_spec + additional_specs # Perform the call with temporarily replaced input_spec original_input_spec = self.input_spec self.input_spec = full_input_spec output = super(Bidirectional, self).__call__(full_input, **kwargs) self.input_spec = original_input_spec return output else: return super(Bidirectional, self).__call__(inputs, **kwargs) def call(self, inputs, mask=None, training=None, initial_state=None, constants=None): kwargs = {} if has_arg(self.layer.call, 'training'): kwargs['training'] = training if has_arg(self.layer.call, 'mask'): kwargs['mask'] = mask if has_arg(self.layer.call, 'constants'): kwargs['constants'] = constants if initial_state is not None and has_arg(self.layer.call, 'initial_state'): forward_inputs = [inputs[0]] backward_inputs = [inputs[0]] pivot = len(initial_state) // 2 + 1 # add forward initial state forward_state = inputs[1:pivot] forward_inputs += forward_state if self._num_constants is None: # add backward initial state backward_state = inputs[pivot:] backward_inputs += backward_state else: # add backward initial state backward_state = inputs[pivot:-self._num_constants] backward_inputs += backward_state # add constants for forward and backward layers forward_inputs += inputs[-self._num_constants:] backward_inputs += inputs[-self._num_constants:] y = self.forward_layer.call(forward_inputs, initial_state=forward_state, **kwargs) y_rev = self.backward_layer.call(backward_inputs, initial_state=backward_state, **kwargs) else: y = self.forward_layer.call(inputs, **kwargs) y_rev = self.backward_layer.call(inputs, **kwargs) if self.return_state: states = y[1:] + y_rev[1:] y = y[0] y_rev = y_rev[0] if self.return_sequences: y_rev = K.reverse(y_rev, 1) if self.merge_mode == 'concat': output = K.concatenate([y, y_rev]) elif self.merge_mode == 'sum': output = y + y_rev elif self.merge_mode == 'ave': output = (y + y_rev) / 2 elif self.merge_mode == 'mul': output = y * y_rev elif self.merge_mode is None: output = [y, y_rev] else: raise ValueError('Unrecognized value for argument ' 'merge_mode: %s' % (self.merge_mode)) # Properly set learning phase if (getattr(y, '_uses_learning_phase', False) or getattr(y_rev, '_uses_learning_phase', False)): if self.merge_mode is None: for out in output: out._uses_learning_phase = True else: output._uses_learning_phase = True if self.return_state: if self.merge_mode is None: return output + states return [output] + states return output def reset_states(self): self.forward_layer.reset_states() self.backward_layer.reset_states() def build(self, input_shape): with K.name_scope(self.forward_layer.name): self.forward_layer.build(input_shape) with K.name_scope(self.backward_layer.name): self.backward_layer.build(input_shape) self.built = True def compute_mask(self, inputs, mask): if isinstance(mask, list): mask = mask[0] if self.return_sequences: if not self.merge_mode: output_mask = [mask, mask] else: output_mask = mask else: output_mask = [None, None] if not self.merge_mode else None if self.return_state: states = self.forward_layer.states state_mask = [None for _ in states] if isinstance(output_mask, list): return output_mask + state_mask * 2 return [output_mask] + state_mask * 2 return output_mask @property def trainable_weights(self): if hasattr(self.forward_layer, 'trainable_weights'): return (self.forward_layer.trainable_weights + self.backward_layer.trainable_weights) return [] @property def non_trainable_weights(self): if hasattr(self.forward_layer, 'non_trainable_weights'): return (self.forward_layer.non_trainable_weights + self.backward_layer.non_trainable_weights) return [] @property def updates(self): if hasattr(self.forward_layer, 'updates'): return self.forward_layer.updates + self.backward_layer.updates return [] def get_updates_for(self, inputs=None): forward_updates = self.forward_layer.get_updates_for(inputs) backward_updates = self.backward_layer.get_updates_for(inputs) return (super(Wrapper, self).get_updates_for(inputs) + forward_updates + backward_updates) @property def losses(self): if hasattr(self.forward_layer, 'losses'): return self.forward_layer.losses + self.backward_layer.losses return [] def get_losses_for(self, inputs=None): forward_losses = self.forward_layer.get_losses_for(inputs) backward_losses = self.backward_layer.get_losses_for(inputs) return (super(Wrapper, self).get_losses_for(inputs) + forward_losses + backward_losses) @property def constraints(self): constraints = {} if hasattr(self.forward_layer, 'constraints'): constraints.update(self.forward_layer.constraints) constraints.update(self.backward_layer.constraints) return constraints def get_config(self): config = {'merge_mode': self.merge_mode} if self._num_constants is not None: config['num_constants'] = self._num_constants base_config = super(Bidirectional, self).get_config() return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config, custom_objects=None): from . import deserialize as deserialize_layer rnn_layer = deserialize_layer(config.pop('layer'), custom_objects=custom_objects) num_constants = config.pop('num_constants', None) layer = cls(rnn_layer, **config) layer._num_constants = num_constants return layer Keras-2.2.4/keras/layers/noise.py0000644000000000116100000001377713354530144016435 0ustar rooteng00000000000000# -*- coding: utf-8 -*- """Layers that operate regularization via the addition of noise. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from ..engine.base_layer import Layer from .. import backend as K import numpy as np from ..legacy import interfaces class GaussianNoise(Layer): """Apply additive zero-centered Gaussian noise. This is useful to mitigate overfitting (you could see it as a form of random data augmentation). Gaussian Noise (GS) is a natural choice as corruption process for real valued inputs. As it is a regularization layer, it is only active at training time. # Arguments stddev: float, standard deviation of the noise distribution. # Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Same shape as input. """ @interfaces.legacy_gaussiannoise_support def __init__(self, stddev, **kwargs): super(GaussianNoise, self).__init__(**kwargs) self.supports_masking = True self.stddev = stddev def call(self, inputs, training=None): def noised(): return inputs + K.random_normal(shape=K.shape(inputs), mean=0., stddev=self.stddev) return K.in_train_phase(noised, inputs, training=training) def get_config(self): config = {'stddev': self.stddev} base_config = super(GaussianNoise, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape class GaussianDropout(Layer): """Apply multiplicative 1-centered Gaussian noise. As it is a regularization layer, it is only active at training time. # Arguments rate: float, drop probability (as with `Dropout`). The multiplicative noise will have standard deviation `sqrt(rate / (1 - rate))`. # Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Same shape as input. # References - [Dropout: A Simple Way to Prevent Neural Networks from Overfitting] (http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) """ @interfaces.legacy_gaussiandropout_support def __init__(self, rate, **kwargs): super(GaussianDropout, self).__init__(**kwargs) self.supports_masking = True self.rate = rate def call(self, inputs, training=None): if 0 < self.rate < 1: def noised(): stddev = np.sqrt(self.rate / (1.0 - self.rate)) return inputs * K.random_normal(shape=K.shape(inputs), mean=1.0, stddev=stddev) return K.in_train_phase(noised, inputs, training=training) return inputs def get_config(self): config = {'rate': self.rate} base_config = super(GaussianDropout, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape class AlphaDropout(Layer): """Applies Alpha Dropout to the input. Alpha Dropout is a `Dropout` that keeps mean and variance of inputs to their original values, in order to ensure the self-normalizing property even after this dropout. Alpha Dropout fits well to Scaled Exponential Linear Units by randomly setting activations to the negative saturation value. # Arguments rate: float, drop probability (as with `Dropout`). The multiplicative noise will have standard deviation `sqrt(rate / (1 - rate))`. seed: A Python integer to use as random seed. # Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Same shape as input. # References - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) """ def __init__(self, rate, noise_shape=None, seed=None, **kwargs): super(AlphaDropout, self).__init__(**kwargs) self.rate = rate self.noise_shape = noise_shape self.seed = seed self.supports_masking = True def _get_noise_shape(self, inputs): return self.noise_shape if self.noise_shape else K.shape(inputs) def call(self, inputs, training=None): if 0. < self.rate < 1.: noise_shape = self._get_noise_shape(inputs) def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed): alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 alpha_p = -alpha * scale kept_idx = K.greater_equal(K.random_uniform(noise_shape, seed=seed), rate) kept_idx = K.cast(kept_idx, K.floatx()) # Get affine transformation params a = ((1 - rate) * (1 + rate * alpha_p ** 2)) ** -0.5 b = -a * alpha_p * rate # Apply mask x = inputs * kept_idx + alpha_p * (1 - kept_idx) # Do affine transformation return a * x + b return K.in_train_phase(dropped_inputs, inputs, training=training) return inputs def get_config(self): config = {'rate': self.rate} base_config = super(AlphaDropout, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape Keras-2.2.4/keras/layers/advanced_activations.py0000644000000000116100000002515713354530144021464 0ustar rooteng00000000000000# -*- coding: utf-8 -*- """Layers that act as activation functions. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from .. import activations from .. import initializers from .. import regularizers from .. import constraints from ..engine.base_layer import Layer from ..engine.base_layer import InputSpec from .. import backend as K from ..legacy import interfaces from ..utils.generic_utils import to_list class LeakyReLU(Layer): """Leaky version of a Rectified Linear Unit. It allows a small gradient when the unit is not active: `f(x) = alpha * x for x < 0`, `f(x) = x for x >= 0`. # Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Same shape as the input. # Arguments alpha: float >= 0. Negative slope coefficient. # References - [Rectifier Nonlinearities Improve Neural Network Acoustic Models] (https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf) """ def __init__(self, alpha=0.3, **kwargs): super(LeakyReLU, self).__init__(**kwargs) self.supports_masking = True self.alpha = K.cast_to_floatx(alpha) def call(self, inputs): return K.relu(inputs, alpha=self.alpha) def get_config(self): config = {'alpha': float(self.alpha)} base_config = super(LeakyReLU, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape class PReLU(Layer): """Parametric Rectified Linear Unit. It follows: `f(x) = alpha * x for x < 0`, `f(x) = x for x >= 0`, where `alpha` is a learned array with the same shape as x. # Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Same shape as the input. # Arguments alpha_initializer: initializer function for the weights. alpha_regularizer: regularizer for the weights. alpha_constraint: constraint for the weights. shared_axes: the axes along which to share learnable parameters for the activation function. For example, if the incoming feature maps are from a 2D convolution with output shape `(batch, height, width, channels)`, and you wish to share parameters across space so that each filter only has one set of parameters, set `shared_axes=[1, 2]`. # References - [Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification](https://arxiv.org/abs/1502.01852) """ @interfaces.legacy_prelu_support def __init__(self, alpha_initializer='zeros', alpha_regularizer=None, alpha_constraint=None, shared_axes=None, **kwargs): super(PReLU, self).__init__(**kwargs) self.supports_masking = True self.alpha_initializer = initializers.get(alpha_initializer) self.alpha_regularizer = regularizers.get(alpha_regularizer) self.alpha_constraint = constraints.get(alpha_constraint) if shared_axes is None: self.shared_axes = None else: self.shared_axes = to_list(shared_axes, allow_tuple=True) def build(self, input_shape): param_shape = list(input_shape[1:]) self.param_broadcast = [False] * len(param_shape) if self.shared_axes is not None: for i in self.shared_axes: param_shape[i - 1] = 1 self.param_broadcast[i - 1] = True self.alpha = self.add_weight(shape=param_shape, name='alpha', initializer=self.alpha_initializer, regularizer=self.alpha_regularizer, constraint=self.alpha_constraint) # Set input spec axes = {} if self.shared_axes: for i in range(1, len(input_shape)): if i not in self.shared_axes: axes[i] = input_shape[i] self.input_spec = InputSpec(ndim=len(input_shape), axes=axes) self.built = True def call(self, inputs, mask=None): pos = K.relu(inputs) if K.backend() == 'theano': neg = (K.pattern_broadcast(self.alpha, self.param_broadcast) * (inputs - K.abs(inputs)) * 0.5) else: neg = -self.alpha * K.relu(-inputs) return pos + neg def get_config(self): config = { 'alpha_initializer': initializers.serialize(self.alpha_initializer), 'alpha_regularizer': regularizers.serialize(self.alpha_regularizer), 'alpha_constraint': constraints.serialize(self.alpha_constraint), 'shared_axes': self.shared_axes } base_config = super(PReLU, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape class ELU(Layer): """Exponential Linear Unit. It follows: `f(x) = alpha * (exp(x) - 1.) for x < 0`, `f(x) = x for x >= 0`. # Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Same shape as the input. # Arguments alpha: scale for the negative factor. # References - [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)](https://arxiv.org/abs/1511.07289v1) """ def __init__(self, alpha=1.0, **kwargs): super(ELU, self).__init__(**kwargs) self.supports_masking = True self.alpha = K.cast_to_floatx(alpha) def call(self, inputs): return K.elu(inputs, self.alpha) def get_config(self): config = {'alpha': float(self.alpha)} base_config = super(ELU, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape class ThresholdedReLU(Layer): """Thresholded Rectified Linear Unit. It follows: `f(x) = x for x > theta`, `f(x) = 0 otherwise`. # Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Same shape as the input. # Arguments theta: float >= 0. Threshold location of activation. # References - [Zero-Bias Autoencoders and the Benefits of Co-Adapting Features] (https://arxiv.org/abs/1402.3337) """ def __init__(self, theta=1.0, **kwargs): super(ThresholdedReLU, self).__init__(**kwargs) self.supports_masking = True self.theta = K.cast_to_floatx(theta) def call(self, inputs, mask=None): return inputs * K.cast(K.greater(inputs, self.theta), K.floatx()) def get_config(self): config = {'theta': float(self.theta)} base_config = super(ThresholdedReLU, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape class Softmax(Layer): """Softmax activation function. # Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Same shape as the input. # Arguments axis: Integer, axis along which the softmax normalization is applied. """ def __init__(self, axis=-1, **kwargs): super(Softmax, self).__init__(**kwargs) self.supports_masking = True self.axis = axis def call(self, inputs): return activations.softmax(inputs, axis=self.axis) def get_config(self): config = {'axis': self.axis} base_config = super(Softmax, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape class ReLU(Layer): """Rectified Linear Unit activation function. With default values, it returns element-wise `max(x, 0)`. Otherwise, it follows: `f(x) = max_value` for `x >= max_value`, `f(x) = x` for `threshold <= x < max_value`, `f(x) = negative_slope * (x - threshold)` otherwise. # Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Same shape as the input. # Arguments max_value: float >= 0. Maximum activation value. negative_slope: float >= 0. Negative slope coefficient. threshold: float. Threshold value for thresholded activation. """ def __init__(self, max_value=None, negative_slope=0., threshold=0., **kwargs): super(ReLU, self).__init__(**kwargs) if max_value is not None and max_value < 0.: raise ValueError('max_value of ReLU layer ' 'cannot be negative value: %s' % str(max_value)) if negative_slope < 0.: raise ValueError('negative_slope of ReLU layer cannot be ' 'negative value: %s' % str(negative_slope)) self.supports_masking = True if max_value is not None: max_value = K.cast_to_floatx(max_value) self.max_value = max_value self.negative_slope = K.cast_to_floatx(negative_slope) self.threshold = K.cast_to_floatx(threshold) def call(self, inputs): return K.relu(inputs, alpha=self.negative_slope, max_value=self.max_value, threshold=self.threshold) def get_config(self): config = { 'max_value': self.max_value, 'negative_slope': self.negative_slope, 'threshold': self.threshold } base_config = super(ReLU, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape Keras-2.2.4/keras/layers/normalization.py0000644000000000116100000002340313354530144020171 0ustar rooteng00000000000000# -*- coding: utf-8 -*- """Normalization layers. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from ..engine.base_layer import Layer, InputSpec from .. import initializers from .. import regularizers from .. import constraints from .. import backend as K from ..legacy import interfaces class BatchNormalization(Layer): """Batch normalization layer (Ioffe and Szegedy, 2014). Normalize the activations of the previous layer at each batch, i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1. # Arguments axis: Integer, the axis that should be normalized (typically the features axis). For instance, after a `Conv2D` layer with `data_format="channels_first"`, set `axis=1` in `BatchNormalization`. momentum: Momentum for the moving mean and the moving variance. epsilon: Small float added to variance to avoid dividing by zero. center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling will be done by the next layer. beta_initializer: Initializer for the beta weight. gamma_initializer: Initializer for the gamma weight. moving_mean_initializer: Initializer for the moving mean. moving_variance_initializer: Initializer for the moving variance. beta_regularizer: Optional regularizer for the beta weight. gamma_regularizer: Optional regularizer for the gamma weight. beta_constraint: Optional constraint for the beta weight. gamma_constraint: Optional constraint for the gamma weight. # Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. # Output shape Same shape as input. # References - [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](https://arxiv.org/abs/1502.03167) """ @interfaces.legacy_batchnorm_support def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, **kwargs): super(BatchNormalization, self).__init__(**kwargs) self.supports_masking = True self.axis = axis self.momentum = momentum self.epsilon = epsilon self.center = center self.scale = scale self.beta_initializer = initializers.get(beta_initializer) self.gamma_initializer = initializers.get(gamma_initializer) self.moving_mean_initializer = initializers.get(moving_mean_initializer) self.moving_variance_initializer = ( initializers.get(moving_variance_initializer)) self.beta_regularizer = regularizers.get(beta_regularizer) self.gamma_regularizer = regularizers.get(gamma_regularizer) self.beta_constraint = constraints.get(beta_constraint) self.gamma_constraint = constraints.get(gamma_constraint) def build(self, input_shape): dim = input_shape[self.axis] if dim is None: raise ValueError('Axis ' + str(self.axis) + ' of ' 'input tensor should have a defined dimension ' 'but the layer received an input with shape ' + str(input_shape) + '.') self.input_spec = InputSpec(ndim=len(input_shape), axes={self.axis: dim}) shape = (dim,) if self.scale: self.gamma = self.add_weight(shape=shape, name='gamma', initializer=self.gamma_initializer, regularizer=self.gamma_regularizer, constraint=self.gamma_constraint) else: self.gamma = None if self.center: self.beta = self.add_weight(shape=shape, name='beta', initializer=self.beta_initializer, regularizer=self.beta_regularizer, constraint=self.beta_constraint) else: self.beta = None self.moving_mean = self.add_weight( shape=shape, name='moving_mean', initializer=self.moving_mean_initializer, trainable=False) self.moving_variance = self.add_weight( shape=shape, name='moving_variance', initializer=self.moving_variance_initializer, trainable=False) self.built = True def call(self, inputs, training=None): input_shape = K.int_shape(inputs) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # Determines whether broadcasting is needed. needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) def normalize_inference(): if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return K.batch_normalization( inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, axis=self.axis, epsilon=self.epsilon) else: return K.batch_normalization( inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, axis=self.axis, epsilon=self.epsilon) # If the learning phase is *static* and set to inference: if training in {0, False}: return normalize_inference() # If the learning is either dynamic, or set to training: normed_training, mean, variance = K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) if K.backend() != 'cntk': sample_size = K.prod([K.shape(inputs)[axis] for axis in reduction_axes]) sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) # sample variance - unbiased estimator of population variance variance *= sample_size / (sample_size - (1.0 + self.epsilon)) self.add_update([K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, variance, self.momentum)], inputs) # Pick the normalized form corresponding to the training phase. return K.in_train_phase(normed_training, normalize_inference, training=training) def get_config(self): config = { 'axis': self.axis, 'momentum': self.momentum, 'epsilon': self.epsilon, 'center': self.center, 'scale': self.scale, 'beta_initializer': initializers.serialize(self.beta_initializer), 'gamma_initializer': initializers.serialize(self.gamma_initializer), 'moving_mean_initializer': initializers.serialize(self.moving_mean_initializer), 'moving_variance_initializer': initializers.serialize(self.moving_variance_initializer), 'beta_regularizer': regularizers.serialize(self.beta_regularizer), 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), 'beta_constraint': constraints.serialize(self.beta_constraint), 'gamma_constraint': constraints.serialize(self.gamma_constraint) } base_config = super(BatchNormalization, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape Keras-2.2.4/keras/layers/cudnn_recurrent.py0000644000000000116100000005504513354530144020512 0ustar rooteng00000000000000"""Recurrent layers backed by cuDNN. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from .. import backend as K from .. import initializers from .. import regularizers from .. import constraints from .recurrent import RNN from ..layers import InputSpec from collections import namedtuple class _CuDNNRNN(RNN): """Private base class for CuDNNGRU and CuDNNLSTM. # Arguments return_sequences: Boolean. Whether to return the last output. in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. """ def __init__(self, return_sequences=False, return_state=False, go_backwards=False, stateful=False, **kwargs): if K.backend() != 'tensorflow': raise RuntimeError('CuDNN RNNs are only available ' 'with the TensorFlow backend.') super(RNN, self).__init__(**kwargs) self.return_sequences = return_sequences self.return_state = return_state self.go_backwards = go_backwards self.stateful = stateful self.supports_masking = False self.input_spec = [InputSpec(ndim=3)] if hasattr(self.cell.state_size, '__len__'): state_size = self.cell.state_size else: state_size = [self.cell.state_size] self.state_spec = [InputSpec(shape=(None, dim)) for dim in state_size] self.constants_spec = None self._states = None self._num_constants = None def _canonical_to_params(self, weights, biases): import tensorflow as tf weights = [tf.reshape(x, (-1,)) for x in weights] biases = [tf.reshape(x, (-1,)) for x in biases] return tf.concat(weights + biases, 0) def call(self, inputs, mask=None, training=None, initial_state=None): if isinstance(mask, list): mask = mask[0] if mask is not None: raise ValueError('Masking is not supported for CuDNN RNNs.') # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): initial_state = inputs[1:] inputs = inputs[0] elif initial_state is not None: pass elif self.stateful: initial_state = self.states else: initial_state = self.get_initial_state(inputs) if len(initial_state) != len(self.states): raise ValueError('Layer has ' + str(len(self.states)) + ' states but was passed ' + str(len(initial_state)) + ' initial states.') if self.go_backwards: # Reverse time axis. inputs = K.reverse(inputs, 1) output, states = self._process_batch(inputs, initial_state) if self.stateful: updates = [] for i in range(len(states)): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) if self.return_state: return [output] + states else: return output def get_config(self): config = {'return_sequences': self.return_sequences, 'return_state': self.return_state, 'go_backwards': self.go_backwards, 'stateful': self.stateful} base_config = super(RNN, self).get_config() return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config): return cls(**config) @property def trainable_weights(self): if self.trainable and self.built: return [self.kernel, self.recurrent_kernel, self.bias] return [] @property def non_trainable_weights(self): if not self.trainable and self.built: return [self.kernel, self.recurrent_kernel, self.bias] return [] @property def losses(self): return super(RNN, self).losses def get_losses_for(self, inputs=None): return super(RNN, self).get_losses_for(inputs=inputs) class CuDNNGRU(_CuDNNRNN): """Fast GRU implementation backed by [CuDNN](https://developer.nvidia.com/cudnn). Can only be run on GPU, with the TensorFlow backend. # Arguments units: Positive integer, dimensionality of the output space. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state. (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to the `recurrent_kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the `kernel` weights matrix (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to the `recurrent_kernel` weights matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). return_sequences: Boolean. Whether to return the last output. in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. """ def __init__(self, units, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, return_sequences=False, return_state=False, stateful=False, **kwargs): self.units = units super(CuDNNGRU, self).__init__( return_sequences=return_sequences, return_state=return_state, stateful=stateful, **kwargs) self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.recurrent_regularizer = regularizers.get(recurrent_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.recurrent_constraint = constraints.get(recurrent_constraint) self.bias_constraint = constraints.get(bias_constraint) @property def cell(self): Cell = namedtuple('cell', 'state_size') cell = Cell(state_size=self.units) return cell def build(self, input_shape): super(CuDNNGRU, self).build(input_shape) if isinstance(input_shape, list): input_shape = input_shape[0] input_dim = input_shape[-1] from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops self._cudnn_gru = cudnn_rnn_ops.CudnnGRU( num_layers=1, num_units=self.units, input_size=input_dim, input_mode='linear_input') self.kernel = self.add_weight(shape=(input_dim, self.units * 3), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.recurrent_kernel = self.add_weight( shape=(self.units, self.units * 3), name='recurrent_kernel', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) self.bias = self.add_weight(shape=(self.units * 6,), name='bias', initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) self.kernel_z = self.kernel[:, :self.units] self.recurrent_kernel_z = self.recurrent_kernel[:, :self.units] self.kernel_r = self.kernel[:, self.units: self.units * 2] self.recurrent_kernel_r = self.recurrent_kernel[:, self.units: self.units * 2] self.kernel_h = self.kernel[:, self.units * 2:] self.recurrent_kernel_h = self.recurrent_kernel[:, self.units * 2:] self.bias_z_i = self.bias[:self.units] self.bias_r_i = self.bias[self.units: self.units * 2] self.bias_h_i = self.bias[self.units * 2: self.units * 3] self.bias_z = self.bias[self.units * 3: self.units * 4] self.bias_r = self.bias[self.units * 4: self.units * 5] self.bias_h = self.bias[self.units * 5:] self.built = True def _process_batch(self, inputs, initial_state): import tensorflow as tf inputs = tf.transpose(inputs, (1, 0, 2)) input_h = initial_state[0] input_h = tf.expand_dims(input_h, axis=0) params = self._canonical_to_params( weights=[ self.kernel_r, self.kernel_z, self.kernel_h, self.recurrent_kernel_r, self.recurrent_kernel_z, self.recurrent_kernel_h, ], biases=[ self.bias_r_i, self.bias_z_i, self.bias_h_i, self.bias_r, self.bias_z, self.bias_h, ], ) outputs, h = self._cudnn_gru( inputs, input_h=input_h, params=params, is_training=True) if self.stateful or self.return_state: h = h[0] if self.return_sequences: output = tf.transpose(outputs, (1, 0, 2)) else: output = outputs[-1] return output, [h] def get_config(self): config = { 'units': self.units, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint)} base_config = super(CuDNNGRU, self).get_config() return dict(list(base_config.items()) + list(config.items())) class CuDNNLSTM(_CuDNNRNN): """Fast LSTM implementation with [CuDNN](https://developer.nvidia.com/cudnn). Can only be run on GPU, with the TensorFlow backend. # Arguments units: Positive integer, dimensionality of the output space. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. (see [initializers](../initializers.md)). unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Setting it to true will also force `bias_initializer="zeros"`. This is recommended in [Jozefowicz et al.] (http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state. (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to the `recurrent_kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the `kernel` weights matrix (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to the `recurrent_kernel` weights matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). return_sequences: Boolean. Whether to return the last output. in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. """ def __init__(self, units, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, return_sequences=False, return_state=False, stateful=False, **kwargs): self.units = units super(CuDNNLSTM, self).__init__( return_sequences=return_sequences, return_state=return_state, stateful=stateful, **kwargs) self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) self.bias_initializer = initializers.get(bias_initializer) self.unit_forget_bias = unit_forget_bias self.kernel_regularizer = regularizers.get(kernel_regularizer) self.recurrent_regularizer = regularizers.get(recurrent_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.recurrent_constraint = constraints.get(recurrent_constraint) self.bias_constraint = constraints.get(bias_constraint) @property def cell(self): Cell = namedtuple('cell', 'state_size') cell = Cell(state_size=(self.units, self.units)) return cell def build(self, input_shape): super(CuDNNLSTM, self).build(input_shape) if isinstance(input_shape, list): input_shape = input_shape[0] input_dim = input_shape[-1] from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops self._cudnn_lstm = cudnn_rnn_ops.CudnnLSTM( num_layers=1, num_units=self.units, input_size=input_dim, input_mode='linear_input') self.kernel = self.add_weight(shape=(input_dim, self.units * 4), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.recurrent_kernel = self.add_weight( shape=(self.units, self.units * 4), name='recurrent_kernel', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) if self.unit_forget_bias: def bias_initializer(shape, *args, **kwargs): return K.concatenate([ self.bias_initializer((self.units * 5,), *args, **kwargs), initializers.Ones()((self.units,), *args, **kwargs), self.bias_initializer((self.units * 2,), *args, **kwargs), ]) else: bias_initializer = self.bias_initializer self.bias = self.add_weight(shape=(self.units * 8,), name='bias', initializer=bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) self.kernel_i = self.kernel[:, :self.units] self.kernel_f = self.kernel[:, self.units: self.units * 2] self.kernel_c = self.kernel[:, self.units * 2: self.units * 3] self.kernel_o = self.kernel[:, self.units * 3:] self.recurrent_kernel_i = self.recurrent_kernel[:, :self.units] self.recurrent_kernel_f = ( self.recurrent_kernel[:, self.units: self.units * 2]) self.recurrent_kernel_c = ( self.recurrent_kernel[:, self.units * 2: self.units * 3]) self.recurrent_kernel_o = self.recurrent_kernel[:, self.units * 3:] self.bias_i_i = self.bias[:self.units] self.bias_f_i = self.bias[self.units: self.units * 2] self.bias_c_i = self.bias[self.units * 2: self.units * 3] self.bias_o_i = self.bias[self.units * 3: self.units * 4] self.bias_i = self.bias[self.units * 4: self.units * 5] self.bias_f = self.bias[self.units * 5: self.units * 6] self.bias_c = self.bias[self.units * 6: self.units * 7] self.bias_o = self.bias[self.units * 7:] self.built = True def _process_batch(self, inputs, initial_state): import tensorflow as tf inputs = tf.transpose(inputs, (1, 0, 2)) input_h = initial_state[0] input_c = initial_state[1] input_h = tf.expand_dims(input_h, axis=0) input_c = tf.expand_dims(input_c, axis=0) params = self._canonical_to_params( weights=[ self.kernel_i, self.kernel_f, self.kernel_c, self.kernel_o, self.recurrent_kernel_i, self.recurrent_kernel_f, self.recurrent_kernel_c, self.recurrent_kernel_o, ], biases=[ self.bias_i_i, self.bias_f_i, self.bias_c_i, self.bias_o_i, self.bias_i, self.bias_f, self.bias_c, self.bias_o, ], ) outputs, h, c = self._cudnn_lstm( inputs, input_h=input_h, input_c=input_c, params=params, is_training=True) if self.stateful or self.return_state: h = h[0] c = c[0] if self.return_sequences: output = tf.transpose(outputs, (1, 0, 2)) else: output = outputs[-1] return output, [h, c] def get_config(self): config = { 'units': self.units, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'unit_forget_bias': self.unit_forget_bias, 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint)} base_config = super(CuDNNLSTM, self).get_config() return dict(list(base_config.items()) + list(config.items())) Keras-2.2.4/keras/layers/convolutional_recurrent.py0000644000000000116100000014004713354530144022274 0ustar rooteng00000000000000# -*- coding: utf-8 -*- """Convolutional-recurrent layers. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from .. import backend as K from .. import activations from .. import initializers from .. import regularizers from .. import constraints from .recurrent import _generate_dropout_mask from .recurrent import _standardize_args import numpy as np import warnings from ..engine.base_layer import InputSpec, Layer from ..utils import conv_utils from ..legacy import interfaces from ..legacy.layers import Recurrent, ConvRecurrent2D from .recurrent import RNN from ..utils.generic_utils import has_arg from ..utils.generic_utils import to_list from ..utils.generic_utils import transpose_shape class ConvRNN2D(RNN): """Base class for convolutional-recurrent layers. # Arguments cell: A RNN cell instance. A RNN cell is a class that has: - a `call(input_at_t, states_at_t)` method, returning `(output_at_t, states_at_t_plus_1)`. The call method of the cell can also take the optional argument `constants`, see section "Note on passing external constants" below. - a `state_size` attribute. This can be a single integer (single state) in which case it is the number of channels of the recurrent state (which should be the same as the number of channels of the cell output). This can also be a list/tuple of integers (one size per state). In this case, the first entry (`state_size[0]`) should be the same as the size of the cell output. return_sequences: Boolean. Whether to return the last output. in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence. stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. input_shape: Use this argument to specify the shape of the input when this layer is the first one in a model. # Input shape 5D tensor with shape: `(samples, timesteps, channels, rows, cols)` if data_format='channels_first' or 5D tensor with shape: `(samples, timesteps, rows, cols, channels)` if data_format='channels_last'. # Output shape - if `return_state`: a list of tensors. The first tensor is the output. The remaining tensors are the last states, each 5D tensor with shape: `(samples, timesteps, filters, new_rows, new_cols)` if data_format='channels_first' or 5D tensor with shape: `(samples, timesteps, new_rows, new_cols, filters)` if data_format='channels_last'. `rows` and `cols` values might have changed due to padding. - if `return_sequences`: 5D tensor with shape: `(samples, timesteps, filters, new_rows, new_cols)` if data_format='channels_first' or 5D tensor with shape: `(samples, timesteps, new_rows, new_cols, filters)` if data_format='channels_last'. - else, 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. # Masking This layer supports masking for input data with a variable number of timesteps. To introduce masks to your data, use an [Embedding](embeddings.md) layer with the `mask_zero` parameter set to `True`. # Note on using statefulness in RNNs You can set RNN layers to be 'stateful', which means that the states computed for the samples in one batch will be reused as initial states for the samples in the next batch. This assumes a one-to-one mapping between samples in different successive batches. To enable statefulness: - specify `stateful=True` in the layer constructor. - specify a fixed batch size for your model, by passing - if sequential model: `batch_input_shape=(...)` to the first layer in your model. - if functional model with 1 or more Input layers: `batch_shape=(...)` to all the first layers in your model. This is the expected shape of your inputs *including the batch size*. It should be a tuple of integers, e.g. `(32, 10, 100, 100, 32)`. Note that the number of rows and columns should be specified too. - specify `shuffle=False` when calling fit(). To reset the states of your model, call `.reset_states()` on either a specific layer, or on your entire model. # Note on specifying the initial state of RNNs You can specify the initial state of RNN layers symbolically by calling them with the keyword argument `initial_state`. The value of `initial_state` should be a tensor or list of tensors representing the initial state of the RNN layer. You can specify the initial state of RNN layers numerically by calling `reset_states` with the keyword argument `states`. The value of `states` should be a numpy array or list of numpy arrays representing the initial state of the RNN layer. # Note on passing external constants to RNNs You can pass "external" constants to the cell using the `constants` keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This requires that the `cell.call` method accepts the same keyword argument `constants`. Such constants can be used to condition the cell transformation on additional static inputs (not changing over time), a.k.a. an attention mechanism. """ def __init__(self, cell, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, **kwargs): if unroll: raise TypeError('Unrolling isn\'t possible with ' 'convolutional RNNs.') if isinstance(cell, (list, tuple)): # The StackedConvRNN2DCells isn't implemented yet. raise TypeError('It is not possible at the moment to' 'stack convolutional cells.') super(ConvRNN2D, self).__init__(cell, return_sequences, return_state, go_backwards, stateful, unroll, **kwargs) self.input_spec = [InputSpec(ndim=5)] def compute_output_shape(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] cell = self.cell if cell.data_format == 'channels_first': rows = input_shape[3] cols = input_shape[4] elif cell.data_format == 'channels_last': rows = input_shape[2] cols = input_shape[3] rows = conv_utils.conv_output_length(rows, cell.kernel_size[0], padding=cell.padding, stride=cell.strides[0], dilation=cell.dilation_rate[0]) cols = conv_utils.conv_output_length(cols, cell.kernel_size[1], padding=cell.padding, stride=cell.strides[1], dilation=cell.dilation_rate[1]) output_shape = input_shape[:2] + (rows, cols, cell.filters) output_shape = transpose_shape(output_shape, cell.data_format, spatial_axes=(2, 3)) if not self.return_sequences: output_shape = output_shape[:1] + output_shape[2:] if self.return_state: output_shape = [output_shape] base = (input_shape[0], rows, cols, cell.filters) base = transpose_shape(base, cell.data_format, spatial_axes=(1, 2)) output_shape += [base[:] for _ in range(2)] return output_shape def build(self, input_shape): # Note input_shape will be list of shapes of initial states and # constants if these are passed in __call__. if self._num_constants is not None: constants_shape = input_shape[-self._num_constants:] else: constants_shape = None if isinstance(input_shape, list): input_shape = input_shape[0] batch_size = input_shape[0] if self.stateful else None self.input_spec[0] = InputSpec(shape=(batch_size, None) + input_shape[2:5]) # allow cell (if layer) to build before we set or validate state_spec if isinstance(self.cell, Layer): step_input_shape = (input_shape[0],) + input_shape[2:] if constants_shape is not None: self.cell.build([step_input_shape] + constants_shape) else: self.cell.build(step_input_shape) # set or validate state_spec if hasattr(self.cell.state_size, '__len__'): state_size = list(self.cell.state_size) else: state_size = [self.cell.state_size] if self.state_spec is not None: # initial_state was passed in call, check compatibility if self.cell.data_format == 'channels_first': ch_dim = 1 elif self.cell.data_format == 'channels_last': ch_dim = 3 if not [spec.shape[ch_dim] for spec in self.state_spec] == state_size: raise ValueError( 'An initial_state was passed that is not compatible with ' '`cell.state_size`. Received `state_spec`={}; ' 'However `cell.state_size` is ' '{}'.format([spec.shape for spec in self.state_spec], self.cell.state_size)) else: if self.cell.data_format == 'channels_first': self.state_spec = [InputSpec(shape=(None, dim, None, None)) for dim in state_size] elif self.cell.data_format == 'channels_last': self.state_spec = [InputSpec(shape=(None, None, None, dim)) for dim in state_size] if self.stateful: self.reset_states() self.built = True def get_initial_state(self, inputs): # (samples, timesteps, rows, cols, filters) initial_state = K.zeros_like(inputs) # (samples, rows, cols, filters) initial_state = K.sum(initial_state, axis=1) shape = list(self.cell.kernel_shape) shape[-1] = self.cell.filters initial_state = self.cell.input_conv(initial_state, K.zeros(tuple(shape)), padding=self.cell.padding) # Fix for Theano because it needs # K.int_shape to work in call() with initial_state. keras_shape = list(K.int_shape(inputs)) keras_shape.pop(1) if K.image_data_format() == 'channels_first': indices = 2, 3 else: indices = 1, 2 for i, j in enumerate(indices): keras_shape[j] = conv_utils.conv_output_length( keras_shape[j], shape[i], padding=self.cell.padding, stride=self.cell.strides[i], dilation=self.cell.dilation_rate[i]) initial_state._keras_shape = keras_shape if hasattr(self.cell.state_size, '__len__'): return [initial_state for _ in self.cell.state_size] else: return [initial_state] def __call__(self, inputs, initial_state=None, constants=None, **kwargs): inputs, initial_state, constants = _standardize_args( inputs, initial_state, constants, self._num_constants) if initial_state is None and constants is None: return super(ConvRNN2D, self).__call__(inputs, **kwargs) # If any of `initial_state` or `constants` are specified and are Keras # tensors, then add them to the inputs and temporarily modify the # input_spec to include them. additional_inputs = [] additional_specs = [] if initial_state is not None: kwargs['initial_state'] = initial_state additional_inputs += initial_state self.state_spec = [] for state in initial_state: try: shape = K.int_shape(state) # Fix for Theano except TypeError: shape = tuple(None for _ in range(K.ndim(state))) self.state_spec.append(InputSpec(shape=shape)) additional_specs += self.state_spec if constants is not None: kwargs['constants'] = constants additional_inputs += constants self.constants_spec = [InputSpec(shape=K.int_shape(constant)) for constant in constants] self._num_constants = len(constants) additional_specs += self.constants_spec # at this point additional_inputs cannot be empty for tensor in additional_inputs: if K.is_keras_tensor(tensor) != K.is_keras_tensor(additional_inputs[0]): raise ValueError('The initial state or constants of an RNN' ' layer cannot be specified with a mix of' ' Keras tensors and non-Keras tensors') if K.is_keras_tensor(additional_inputs[0]): # Compute the full input spec, including state and constants full_input = [inputs] + additional_inputs full_input_spec = self.input_spec + additional_specs # Perform the call with temporarily replaced input_spec original_input_spec = self.input_spec self.input_spec = full_input_spec output = super(ConvRNN2D, self).__call__(full_input, **kwargs) self.input_spec = original_input_spec return output else: return super(ConvRNN2D, self).__call__(inputs, **kwargs) def call(self, inputs, mask=None, training=None, initial_state=None, constants=None): # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): inputs = inputs[0] if initial_state is not None: pass elif self.stateful: initial_state = self.states else: initial_state = self.get_initial_state(inputs) if isinstance(mask, list): mask = mask[0] if len(initial_state) != len(self.states): raise ValueError('Layer has ' + str(len(self.states)) + ' states but was passed ' + str(len(initial_state)) + ' initial states.') timesteps = K.int_shape(inputs)[1] kwargs = {} if has_arg(self.cell.call, 'training'): kwargs['training'] = training if constants: if not has_arg(self.cell.call, 'constants'): raise ValueError('RNN cell does not support constants') def step(inputs, states): constants = states[-self._num_constants:] states = states[:-self._num_constants] return self.cell.call(inputs, states, constants=constants, **kwargs) else: def step(inputs, states): return self.cell.call(inputs, states, **kwargs) last_output, outputs, states = K.rnn(step, inputs, initial_state, constants=constants, go_backwards=self.go_backwards, mask=mask, input_length=timesteps) if self.stateful: updates = [] for i in range(len(states)): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) if self.return_sequences: output = outputs else: output = last_output # Properly set learning phase if getattr(last_output, '_uses_learning_phase', False): output._uses_learning_phase = True if self.return_state: states = to_list(states, allow_tuple=True) return [output] + states else: return output def reset_states(self, states=None): if not self.stateful: raise AttributeError('Layer must be stateful.') input_shape = self.input_spec[0].shape state_shape = self.compute_output_shape(input_shape) if self.return_state: state_shape = state_shape[0] if self.return_sequences: state_shape = state_shape[:1] + state_shape[2:] if None in state_shape: raise ValueError('If a RNN is stateful, it needs to know ' 'its batch size. Specify the batch size ' 'of your input tensors: \n' '- If using a Sequential model, ' 'specify the batch size by passing ' 'a `batch_input_shape` ' 'argument to your first layer.\n' '- If using the functional API, specify ' 'the time dimension by passing a ' '`batch_shape` argument to your Input layer.\n' 'The same thing goes for the number of rows ' 'and columns.') # helper function def get_tuple_shape(nb_channels): result = list(state_shape) if self.cell.data_format == 'channels_first': result[1] = nb_channels elif self.cell.data_format == 'channels_last': result[3] = nb_channels else: raise KeyError return tuple(result) # initialize state if None if self.states[0] is None: if hasattr(self.cell.state_size, '__len__'): self.states = [K.zeros(get_tuple_shape(dim)) for dim in self.cell.state_size] else: self.states = [K.zeros(get_tuple_shape(self.cell.state_size))] elif states is None: if hasattr(self.cell.state_size, '__len__'): for state, dim in zip(self.states, self.cell.state_size): K.set_value(state, np.zeros(get_tuple_shape(dim))) else: K.set_value(self.states[0], np.zeros(get_tuple_shape(self.cell.state_size))) else: states = to_list(states, allow_tuple=True) if len(states) != len(self.states): raise ValueError('Layer ' + self.name + ' expects ' + str(len(self.states)) + ' states, ' 'but it received ' + str(len(states)) + ' state values. Input received: ' + str(states)) for index, (value, state) in enumerate(zip(states, self.states)): if hasattr(self.cell.state_size, '__len__'): dim = self.cell.state_size[index] else: dim = self.cell.state_size if value.shape != get_tuple_shape(dim): raise ValueError('State ' + str(index) + ' is incompatible with layer ' + self.name + ': expected shape=' + str(get_tuple_shape(dim)) + ', found shape=' + str(value.shape)) # TODO: consider batch calls to `set_value`. K.set_value(state, value) class ConvLSTM2DCell(Layer): """Cell class for the ConvLSTM2D layer. # Arguments filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of n integers, specifying the dimensions of the convolution window. strides: An integer or tuple/list of n integers, specifying the strides of the convolution. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `"channels_last"` (default) or `"channels_first"`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be `"channels_last"`. dilation_rate: An integer or tuple/list of n integers, specifying the dilation rate to use for dilated convolution. Currently, specifying any `dilation_rate` value != 1 is incompatible with specifying any `strides` value != 1. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step (see [activations](../activations.md)). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state. (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Use in combination with `bias_initializer="zeros"`. This is recommended in [Jozefowicz et al.] (http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to the `recurrent_kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the `kernel` weights matrix (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to the `recurrent_kernel` weights matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. """ def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., **kwargs): super(ConvLSTM2DCell, self).__init__(**kwargs) self.filters = filters self.kernel_size = conv_utils.normalize_tuple(kernel_size, 2, 'kernel_size') self.strides = conv_utils.normalize_tuple(strides, 2, 'strides') self.padding = conv_utils.normalize_padding(padding) self.data_format = K.normalize_data_format(data_format) self.dilation_rate = conv_utils.normalize_tuple(dilation_rate, 2, 'dilation_rate') self.activation = activations.get(activation) self.recurrent_activation = activations.get(recurrent_activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) self.bias_initializer = initializers.get(bias_initializer) self.unit_forget_bias = unit_forget_bias self.kernel_regularizer = regularizers.get(kernel_regularizer) self.recurrent_regularizer = regularizers.get(recurrent_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.recurrent_constraint = constraints.get(recurrent_constraint) self.bias_constraint = constraints.get(bias_constraint) if K.backend() == 'theano' and (dropout or recurrent_dropout): warnings.warn( 'RNN dropout is no longer supported with the Theano backend ' 'due to technical limitations. ' 'You can either set `dropout` and `recurrent_dropout` to 0, ' 'or use the TensorFlow backend.') dropout = 0. recurrent_dropout = 0. self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) self.state_size = (self.filters, self.filters) self._dropout_mask = None self._recurrent_dropout_mask = None def build(self, input_shape): if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') input_dim = input_shape[channel_axis] kernel_shape = self.kernel_size + (input_dim, self.filters * 4) self.kernel_shape = kernel_shape recurrent_kernel_shape = self.kernel_size + (self.filters, self.filters * 4) self.kernel = self.add_weight(shape=kernel_shape, initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.recurrent_kernel = self.add_weight( shape=recurrent_kernel_shape, initializer=self.recurrent_initializer, name='recurrent_kernel', regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) if self.use_bias: if self.unit_forget_bias: def bias_initializer(_, *args, **kwargs): return K.concatenate([ self.bias_initializer((self.filters,), *args, **kwargs), initializers.Ones()((self.filters,), *args, **kwargs), self.bias_initializer((self.filters * 2,), *args, **kwargs), ]) else: bias_initializer = self.bias_initializer self.bias = self.add_weight(shape=(self.filters * 4,), name='bias', initializer=bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None self.kernel_i = self.kernel[:, :, :, :self.filters] self.recurrent_kernel_i = self.recurrent_kernel[:, :, :, :self.filters] self.kernel_f = self.kernel[:, :, :, self.filters: self.filters * 2] self.recurrent_kernel_f = ( self.recurrent_kernel[:, :, :, self.filters: self.filters * 2]) self.kernel_c = self.kernel[:, :, :, self.filters * 2: self.filters * 3] self.recurrent_kernel_c = ( self.recurrent_kernel[:, :, :, self.filters * 2: self.filters * 3]) self.kernel_o = self.kernel[:, :, :, self.filters * 3:] self.recurrent_kernel_o = self.recurrent_kernel[:, :, :, self.filters * 3:] if self.use_bias: self.bias_i = self.bias[:self.filters] self.bias_f = self.bias[self.filters: self.filters * 2] self.bias_c = self.bias[self.filters * 2: self.filters * 3] self.bias_o = self.bias[self.filters * 3:] else: self.bias_i = None self.bias_f = None self.bias_c = None self.bias_o = None self.built = True def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( K.ones_like(inputs), self.dropout, training=training, count=4) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(states[1]), self.recurrent_dropout, training=training, count=4) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 x_i = self.input_conv(inputs_i, self.kernel_i, self.bias_i, padding=self.padding) x_f = self.input_conv(inputs_f, self.kernel_f, self.bias_f, padding=self.padding) x_c = self.input_conv(inputs_c, self.kernel_c, self.bias_c, padding=self.padding) x_o = self.input_conv(inputs_o, self.kernel_o, self.bias_o, padding=self.padding) h_i = self.recurrent_conv(h_tm1_i, self.recurrent_kernel_i) h_f = self.recurrent_conv(h_tm1_f, self.recurrent_kernel_f) h_c = self.recurrent_conv(h_tm1_c, self.recurrent_kernel_c) h_o = self.recurrent_conv(h_tm1_o, self.recurrent_kernel_o) i = self.recurrent_activation(x_i + h_i) f = self.recurrent_activation(x_f + h_f) c = f * c_tm1 + i * self.activation(x_c + h_c) o = self.recurrent_activation(x_o + h_o) h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c] def input_conv(self, x, w, b=None, padding='valid'): conv_out = K.conv2d(x, w, strides=self.strides, padding=padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if b is not None: conv_out = K.bias_add(conv_out, b, data_format=self.data_format) return conv_out def recurrent_conv(self, x, w): conv_out = K.conv2d(x, w, strides=(1, 1), padding='same', data_format=self.data_format) return conv_out def get_config(self): config = {'filters': self.filters, 'kernel_size': self.kernel_size, 'strides': self.strides, 'padding': self.padding, 'data_format': self.data_format, 'dilation_rate': self.dilation_rate, 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'unit_forget_bias': self.unit_forget_bias, 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout} base_config = super(ConvLSTM2DCell, self).get_config() return dict(list(base_config.items()) + list(config.items())) class ConvLSTM2D(ConvRNN2D): """Convolutional LSTM. It is similar to an LSTM layer, but the input transformations and recurrent transformations are both convolutional. # Arguments filters: Integer, the dimensionality of the output space (i.e. the number output of filters in the convolution). kernel_size: An integer or tuple/list of n integers, specifying the dimensions of the convolution window. strides: An integer or tuple/list of n integers, specifying the strides of the convolution. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `"channels_last"` (default) or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, time, ..., channels)` while `"channels_first"` corresponds to inputs with shape `(batch, time, channels, ...)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be `"channels_last"`. dilation_rate: An integer or tuple/list of n integers, specifying the dilation rate to use for dilated convolution. Currently, specifying any `dilation_rate` value != 1 is incompatible with specifying any `strides` value != 1. activation: Activation function to use (see [activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step (see [activations](../activations.md)). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state. (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Use in combination with `bias_initializer="zeros"`. This is recommended in [Jozefowicz et al.] (http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to the `recurrent_kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the `kernel` weights matrix (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to the `recurrent_kernel` weights matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. go_backwards: Boolean (default False). If True, process the input sequence backwards. stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. # Input shape - if data_format='channels_first' 5D tensor with shape: `(samples, time, channels, rows, cols)` - if data_format='channels_last' 5D tensor with shape: `(samples, time, rows, cols, channels)` # Output shape - if `return_sequences` - if data_format='channels_first' 5D tensor with shape: `(samples, time, filters, output_row, output_col)` - if data_format='channels_last' 5D tensor with shape: `(samples, time, output_row, output_col, filters)` - else - if data_format='channels_first' 4D tensor with shape: `(samples, filters, output_row, output_col)` - if data_format='channels_last' 4D tensor with shape: `(samples, output_row, output_col, filters)` where o_row and o_col depend on the shape of the filter and the padding # Raises ValueError: in case of invalid constructor arguments. # References - [Convolutional LSTM Network: A Machine Learning Approach for Precipitation Nowcasting](http://arxiv.org/abs/1506.04214v1) The current implementation does not include the feedback loop on the cells output """ @interfaces.legacy_convlstm2d_support def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, return_sequences=False, go_backwards=False, stateful=False, dropout=0., recurrent_dropout=0., **kwargs): cell = ConvLSTM2DCell(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, recurrent_activation=recurrent_activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, unit_forget_bias=unit_forget_bias, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, dropout=dropout, recurrent_dropout=recurrent_dropout) super(ConvLSTM2D, self).__init__(cell, return_sequences=return_sequences, go_backwards=go_backwards, stateful=stateful, **kwargs) self.activity_regularizer = regularizers.get(activity_regularizer) def call(self, inputs, mask=None, training=None, initial_state=None): return super(ConvLSTM2D, self).call(inputs, mask=mask, training=training, initial_state=initial_state) @property def filters(self): return self.cell.filters @property def kernel_size(self): return self.cell.kernel_size @property def strides(self): return self.cell.strides @property def padding(self): return self.cell.padding @property def data_format(self): return self.cell.data_format @property def dilation_rate(self): return self.cell.dilation_rate @property def activation(self): return self.cell.activation @property def recurrent_activation(self): return self.cell.recurrent_activation @property def use_bias(self): return self.cell.use_bias @property def kernel_initializer(self): return self.cell.kernel_initializer @property def recurrent_initializer(self): return self.cell.recurrent_initializer @property def bias_initializer(self): return self.cell.bias_initializer @property def unit_forget_bias(self): return self.cell.unit_forget_bias @property def kernel_regularizer(self): return self.cell.kernel_regularizer @property def recurrent_regularizer(self): return self.cell.recurrent_regularizer @property def bias_regularizer(self): return self.cell.bias_regularizer @property def kernel_constraint(self): return self.cell.kernel_constraint @property def recurrent_constraint(self): return self.cell.recurrent_constraint @property def bias_constraint(self): return self.cell.bias_constraint @property def dropout(self): return self.cell.dropout @property def recurrent_dropout(self): return self.cell.recurrent_dropout def get_config(self): config = {'filters': self.filters, 'kernel_size': self.kernel_size, 'strides': self.strides, 'padding': self.padding, 'data_format': self.data_format, 'dilation_rate': self.dilation_rate, 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'unit_forget_bias': self.unit_forget_bias, 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout} base_config = super(ConvLSTM2D, self).get_config() del base_config['cell'] return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config): return cls(**config) Keras-2.2.4/keras/models.py0000644000000000116100000002417513354530144015276 0ustar rooteng00000000000000"""Model-related utilities. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from . import backend as K from .utils.generic_utils import has_arg from .utils.generic_utils import to_list from .engine.input_layer import Input from .engine.input_layer import InputLayer from .engine.training import Model from .engine.sequential import Sequential from .engine.saving import save_model from .engine.saving import load_model from .engine.saving import model_from_config from .engine.saving import model_from_yaml from .engine.saving import model_from_json try: import h5py except ImportError: h5py = None def _clone_functional_model(model, input_tensors=None): """Clone a functional `Model` instance. Model cloning is similar to calling a model on new inputs, except that it creates new layers (and thus new weights) instead of sharing the weights of the existing layers. # Arguments model: Instance of `Model`. input_tensors: optional list of input tensors to build the model upon. If not provided, placeholders will be created. # Returns An instance of `Model` reproducing the behavior of the original model, on top of new inputs tensors, using newly instantiated weights. # Raises ValueError: in case of invalid `model` argument value. """ if not isinstance(model, Model): raise ValueError('Expected `model` argument ' 'to be a `Model` instance, got ', model) if isinstance(model, Sequential): raise ValueError('Expected `model` argument ' 'to be a functional `Model` instance, ' 'got a `Sequential` instance instead:', model) layer_map = {} # Cache for created layers. tensor_map = {} # Map {reference_tensor: (corresponding_tensor, mask)} if input_tensors is None: # Create placeholders to build the model on top of. input_layers = [] input_tensors = [] for layer in model._input_layers: input_tensor = Input(batch_shape=layer.batch_input_shape, dtype=layer.dtype, sparse=layer.sparse, name=layer.name) input_tensors.append(input_tensor) # Cache newly created input layer. newly_created_input_layer = input_tensor._keras_history[0] layer_map[layer] = newly_created_input_layer for _original, _cloned in zip(model._input_layers, input_layers): layer_map[_original] = _cloned else: # Make sure that all input tensors come from a Keras layer. # If tensor comes from an input layer: cache the input layer. input_tensors = to_list(input_tensors) _input_tensors = [] for i, x in enumerate(input_tensors): if not K.is_keras_tensor(x): name = model._input_layers[i].name input_tensor = Input(tensor=x, name='input_wrapper_for_' + name) _input_tensors.append(input_tensor) # Cache newly created input layer. original_input_layer = x._keras_history[0] newly_created_input_layer = input_tensor._keras_history[0] layer_map[original_input_layer] = newly_created_input_layer else: _input_tensors.append(x) input_tensors = _input_tensors for x, y in zip(model.inputs, input_tensors): tensor_map[x] = (y, None) # tensor, mask # Iterated over every node in the reference model, in depth order. depth_keys = list(model._nodes_by_depth.keys()) depth_keys.sort(reverse=True) for depth in depth_keys: nodes = model._nodes_by_depth[depth] for node in nodes: # Recover the corresponding layer. layer = node.outbound_layer # Get or create layer. if layer not in layer_map: # Clone layer. new_layer = layer.__class__.from_config(layer.get_config()) layer_map[layer] = new_layer layer = new_layer else: # Reuse previously cloned layer. layer = layer_map[layer] # Don't call InputLayer multiple times. if isinstance(layer, InputLayer): continue # Gather inputs to call the new layer. reference_input_tensors = node.input_tensors reference_output_tensors = node.output_tensors # If all previous input tensors are available in tensor_map, # then call node.inbound_layer on them. computed_data = [] # List of tuples (input, mask). for x in reference_input_tensors: if x in tensor_map: computed_data.append(tensor_map[x]) if len(computed_data) == len(reference_input_tensors): # Call layer. if node.arguments: kwargs = node.arguments else: kwargs = {} if len(computed_data) == 1: computed_tensor, computed_mask = computed_data[0] if has_arg(layer.call, 'mask'): if 'mask' not in kwargs: kwargs['mask'] = computed_mask output_tensors = to_list( layer(computed_tensor, **kwargs)) output_masks = to_list( layer.compute_mask(computed_tensor, computed_mask)) computed_tensors = [computed_tensor] computed_masks = [computed_mask] else: computed_tensors = [x[0] for x in computed_data] computed_masks = [x[1] for x in computed_data] if has_arg(layer.call, 'mask'): if 'mask' not in kwargs: kwargs['mask'] = computed_masks output_tensors = to_list( layer(computed_tensors, **kwargs)) output_masks = to_list( layer.compute_mask(computed_tensors, computed_masks)) # Update tensor_map. for x, y, mask in zip(reference_output_tensors, output_tensors, output_masks): tensor_map[x] = (y, mask) # Check that we did compute the model outputs, # then instantiate a new model from inputs and outputs. output_tensors = [] for x in model.outputs: assert x in tensor_map, 'Could not compute output ' + str(x) tensor, _ = tensor_map[x] output_tensors.append(tensor) return Model(input_tensors, output_tensors, name=model.name) def _clone_sequential_model(model, input_tensors=None): """Clone a `Sequential` model instance. Model cloning is similar to calling a model on new inputs, except that it creates new layers (and thus new weights) instead of sharing the weights of the existing layers. # Arguments model: Instance of `Sequential`. input_tensors: optional list of input tensors to build the model upon. If not provided, placeholders will be created. # Returns An instance of `Sequential` reproducing the behavior of the original model, on top of new inputs tensors, using newly instantiated weights. # Raises ValueError: in case of invalid `model` argument value. """ if not isinstance(model, Sequential): raise ValueError('Expected `model` argument ' 'to be a `Sequential` model instance, ' 'but got:', model) def clone(layer): return layer.__class__.from_config(layer.get_config()) layers = [clone(layer) for layer in model.layers] if input_tensors is None: return Sequential(layers=layers, name=model.name) else: if len(to_list(input_tensors)) != 1: raise ValueError('To clone a `Sequential` model, we expect ' ' at most one tensor ' 'as part of `input_tensors`.') x = to_list(input_tensors)[0] if K.is_keras_tensor(x): origin_layer = x._keras_history[0] if isinstance(origin_layer, InputLayer): return Sequential(layers=[origin_layer] + layers, name=model.name) else: raise ValueError('Cannot clone a `Sequential` model on top ' 'of a tensor that comes from a Keras layer ' 'other than an `InputLayer`. ' 'Use the functional API instead.') input_tensor = Input(tensor=x, name='input_wrapper_for_' + str(x.name)) input_layer = input_tensor._keras_history[0] return Sequential(layers=[input_layer] + layers, name=model.name) def clone_model(model, input_tensors=None): """Clone any `Model` instance. Model cloning is similar to calling a model on new inputs, except that it creates new layers (and thus new weights) instead of sharing the weights of the existing layers. # Arguments model: Instance of `Model` (could be a functional model or a Sequential model). input_tensors: optional list of input tensors to build the model upon. If not provided, placeholders will be created. # Returns An instance of `Model` reproducing the behavior of the original model, on top of new inputs tensors, using newly instantiated weights. # Raises ValueError: in case of invalid `model` argument value. """ if isinstance(model, Sequential): return _clone_sequential_model(model, input_tensors=input_tensors) else: return _clone_functional_model(model, input_tensors=input_tensors) Keras-2.2.4/keras/activations.py0000644000000000116100000001277713354530144016344 0ustar rooteng00000000000000"""Built-in activation functions. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import six import warnings from . import backend as K from .utils.generic_utils import deserialize_keras_object from .engine import Layer def softmax(x, axis=-1): """Softmax activation function. # Arguments x: Input tensor. axis: Integer, axis along which the softmax normalization is applied. # Returns Tensor, output of softmax transformation. # Raises ValueError: In case `dim(x) == 1`. """ ndim = K.ndim(x) if ndim == 1: raise ValueError('Cannot apply softmax to a tensor that is 1D') elif ndim == 2: return K.softmax(x) elif ndim > 2: e = K.exp(x - K.max(x, axis=axis, keepdims=True)) s = K.sum(e, axis=axis, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor that is 1D. ' 'Received input: %s' % x) def elu(x, alpha=1.0): """Exponential linear unit. # Arguments x: Input tensor. alpha: A scalar, slope of negative section. # Returns The exponential linear activation: `x` if `x > 0` and `alpha * (exp(x)-1)` if `x < 0`. # References - [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)](https://arxiv.org/abs/1511.07289) """ return K.elu(x, alpha) def selu(x): """Scaled Exponential Linear Unit (SELU). SELU is equal to: `scale * elu(x, alpha)`, where alpha and scale are pre-defined constants. The values of `alpha` and `scale` are chosen so that the mean and variance of the inputs are preserved between two consecutive layers as long as the weights are initialized correctly (see `lecun_normal` initialization) and the number of inputs is "large enough" (see references for more information). # Arguments x: A tensor or variable to compute the activation function for. # Returns The scaled exponential unit activation: `scale * elu(x, alpha)`. # Note - To be used together with the initialization "lecun_normal". - To be used together with the dropout variant "AlphaDropout". # References - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) """ alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 return scale * K.elu(x, alpha) def softplus(x): """Softplus activation function. # Arguments x: Input tensor. # Returns The softplus activation: `log(exp(x) + 1)`. """ return K.softplus(x) def softsign(x): """Softsign activation function. # Arguments x: Input tensor. # Returns The softplus activation: `x / (abs(x) + 1)`. """ return K.softsign(x) def relu(x, alpha=0., max_value=None, threshold=0.): """Rectified Linear Unit. With default values, it returns element-wise `max(x, 0)`. Otherwise, it follows: `f(x) = max_value` for `x >= max_value`, `f(x) = x` for `threshold <= x < max_value`, `f(x) = alpha * (x - threshold)` otherwise. # Arguments x: Input tensor. alpha: float. Slope of the negative part. Defaults to zero. max_value: float. Saturation threshold. threshold: float. Threshold value for thresholded activation. # Returns A tensor. """ return K.relu(x, alpha=alpha, max_value=max_value, threshold=threshold) def tanh(x): """Hyperbolic tangent activation function. """ return K.tanh(x) def sigmoid(x): """Sigmoid activation function. """ return K.sigmoid(x) def hard_sigmoid(x): """Hard sigmoid activation function. Faster to compute than sigmoid activation. # Arguments x: Input tensor. # Returns Hard sigmoid activation: - `0` if `x < -2.5` - `1` if `x > 2.5` - `0.2 * x + 0.5` if `-2.5 <= x <= 2.5`. """ return K.hard_sigmoid(x) def exponential(x): """Exponential (base e) activation function. """ return K.exp(x) def linear(x): """Linear (i.e. identity) activation function. """ return x def serialize(activation): return activation.__name__ def deserialize(name, custom_objects=None): return deserialize_keras_object( name, module_objects=globals(), custom_objects=custom_objects, printable_module_name='activation function') def get(identifier): """Get the `identifier` activation function. # Arguments identifier: None or str, name of the function. # Returns The activation function, `linear` if `identifier` is None. # Raises ValueError if unknown identifier """ if identifier is None: return linear if isinstance(identifier, six.string_types): identifier = str(identifier) return deserialize(identifier) elif callable(identifier): if isinstance(identifier, Layer): warnings.warn( 'Do not pass a layer instance (such as {identifier}) as the ' 'activation argument of another layer. Instead, advanced ' 'activation layers should be used just like any other ' 'layer in a model.'.format( identifier=identifier.__class__.__name__)) return identifier else: raise ValueError('Could not interpret ' 'activation function identifier:', identifier) Keras-2.2.4/keras/datasets/0000755000000000116100000000000013355226624015246 5ustar rooteng00000000000000Keras-2.2.4/keras/datasets/cifar100.py0000644000000000116100000000255713227311000017113 0ustar rooteng00000000000000"""CIFAR100 small images classification dataset. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from .cifar import load_batch from ..utils.data_utils import get_file from .. import backend as K import numpy as np import os def load_data(label_mode='fine'): """Loads CIFAR100 dataset. # Arguments label_mode: one of "fine", "coarse". # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. # Raises ValueError: in case of invalid `label_mode`. """ if label_mode not in ['fine', 'coarse']: raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.') dirname = 'cifar-100-python' origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' path = get_file(dirname, origin=origin, untar=True) fpath = os.path.join(path, 'train') x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') fpath = os.path.join(path, 'test') x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test) Keras-2.2.4/keras/datasets/fashion_mnist.py0000644000000000116100000000266413227311000020446 0ustar rooteng00000000000000"""Fashion-MNIST dataset. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import gzip import os from ..utils.data_utils import get_file import numpy as np def load_data(): """Loads the Fashion-MNIST dataset. # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ dirname = os.path.join('datasets', 'fashion-mnist') base = 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/' files = ['train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz'] paths = [] for fname in files: paths.append(get_file(fname, origin=base + fname, cache_subdir=dirname)) with gzip.open(paths[0], 'rb') as lbpath: y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8) with gzip.open(paths[1], 'rb') as imgpath: x_train = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28) with gzip.open(paths[2], 'rb') as lbpath: y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8) with gzip.open(paths[3], 'rb') as imgpath: x_test = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28) return (x_train, y_train), (x_test, y_test) Keras-2.2.4/keras/datasets/__init__.py0000644000000000116100000000031313176437624017361 0ustar rooteng00000000000000from __future__ import absolute_import from . import mnist from . import imdb from . import reuters from . import cifar10 from . import cifar100 from . import boston_housing from . import fashion_mnist Keras-2.2.4/keras/datasets/imdb.py0000644000000000116100000001052613354530144016531 0ustar rooteng00000000000000"""IMDB sentiment classification dataset. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from ..utils.data_utils import get_file from ..preprocessing.sequence import _remove_long_seq import numpy as np import json import warnings def load_data(path='imdb.npz', num_words=None, skip_top=0, maxlen=None, seed=113, start_char=1, oov_char=2, index_from=3, **kwargs): """Loads the IMDB dataset. # Arguments path: where to cache the data (relative to `~/.keras/dataset`). num_words: max number of words to include. Words are ranked by how often they occur (in the training set) and only the most frequent words are kept skip_top: skip the top N most frequently occurring words (which may not be informative). maxlen: sequences longer than this will be filtered out. seed: random seed for sample shuffling. start_char: The start of a sequence will be marked with this character. Set to 1 because 0 is usually the padding character. oov_char: words that were cut out because of the `num_words` or `skip_top` limit will be replaced with this character. index_from: index actual words with this index and higher. # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. # Raises ValueError: in case `maxlen` is so low that no input sequence could be kept. Note that the 'out of vocabulary' character is only used for words that were present in the training set but are not included because they're not making the `num_words` cut here. Words that were not seen in the training set but are in the test set have simply been skipped. """ # Legacy support if 'nb_words' in kwargs: warnings.warn('The `nb_words` argument in `load_data` ' 'has been renamed `num_words`.') num_words = kwargs.pop('nb_words') if kwargs: raise TypeError('Unrecognized keyword arguments: ' + str(kwargs)) path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/imdb.npz', file_hash='599dadb1135973df5b59232a0e9a887c') with np.load(path) as f: x_train, labels_train = f['x_train'], f['y_train'] x_test, labels_test = f['x_test'], f['y_test'] np.random.seed(seed) indices = np.arange(len(x_train)) np.random.shuffle(indices) x_train = x_train[indices] labels_train = labels_train[indices] indices = np.arange(len(x_test)) np.random.shuffle(indices) x_test = x_test[indices] labels_test = labels_test[indices] xs = np.concatenate([x_train, x_test]) labels = np.concatenate([labels_train, labels_test]) if start_char is not None: xs = [[start_char] + [w + index_from for w in x] for x in xs] elif index_from: xs = [[w + index_from for w in x] for x in xs] if maxlen: xs, labels = _remove_long_seq(maxlen, xs, labels) if not xs: raise ValueError('After filtering for sequences shorter than maxlen=' + str(maxlen) + ', no sequence was kept. ' 'Increase maxlen.') if not num_words: num_words = max([max(x) for x in xs]) # by convention, use 2 as OOV word # reserve 'index_from' (=3 by default) characters: # 0 (padding), 1 (start), 2 (OOV) if oov_char is not None: xs = [[w if (skip_top <= w < num_words) else oov_char for w in x] for x in xs] else: xs = [[w for w in x if skip_top <= w < num_words] for x in xs] idx = len(x_train) x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx]) x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:]) return (x_train, y_train), (x_test, y_test) def get_word_index(path='imdb_word_index.json'): """Retrieves the dictionary mapping words to word indices. # Arguments path: where to cache the data (relative to `~/.keras/dataset`). # Returns The word index dictionary. """ path = get_file( path, origin='https://s3.amazonaws.com/text-datasets/imdb_word_index.json', file_hash='bfafd718b763782e994055a2d397834f') with open(path) as f: return json.load(f) Keras-2.2.4/keras/datasets/boston_housing.py0000644000000000116100000000257313354530144020661 0ustar rooteng00000000000000"""Boston housing price regression dataset. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from ..utils.data_utils import get_file import numpy as np def load_data(path='boston_housing.npz', test_split=0.2, seed=113): """Loads the Boston Housing dataset. # Arguments path: path where to cache the dataset locally (relative to ~/.keras/datasets). test_split: fraction of the data to reserve as test set. seed: Random seed for shuffling the data before computing the test split. # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ assert 0 <= test_split < 1 path = get_file( path, origin='https://s3.amazonaws.com/keras-datasets/boston_housing.npz', file_hash='f553886a1f8d56431e820c5b82552d9d95cfcb96d1e678153f8839538947dff5') with np.load(path) as f: x = f['x'] y = f['y'] np.random.seed(seed) indices = np.arange(len(x)) np.random.shuffle(indices) x = x[indices] y = y[indices] x_train = np.array(x[:int(len(x) * (1 - test_split))]) y_train = np.array(y[:int(len(x) * (1 - test_split))]) x_test = np.array(x[int(len(x) * (1 - test_split)):]) y_test = np.array(y[int(len(x) * (1 - test_split)):]) return (x_train, y_train), (x_test, y_test) Keras-2.2.4/keras/datasets/reuters.py0000644000000000116100000000750313354530144017310 0ustar rooteng00000000000000# -*- coding: utf-8 -*- """Reuters topic classification dataset. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from ..utils.data_utils import get_file from ..preprocessing.sequence import _remove_long_seq import numpy as np import json import warnings def load_data(path='reuters.npz', num_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, start_char=1, oov_char=2, index_from=3, **kwargs): """Loads the Reuters newswire classification dataset. # Arguments path: where to cache the data (relative to `~/.keras/dataset`). num_words: max number of words to include. Words are ranked by how often they occur (in the training set) and only the most frequent words are kept skip_top: skip the top N most frequently occurring words (which may not be informative). maxlen: truncate sequences after this length. test_split: Fraction of the dataset to be used as test data. seed: random seed for sample shuffling. start_char: The start of a sequence will be marked with this character. Set to 1 because 0 is usually the padding character. oov_char: words that were cut out because of the `num_words` or `skip_top` limit will be replaced with this character. index_from: index actual words with this index and higher. # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. Note that the 'out of vocabulary' character is only used for words that were present in the training set but are not included because they're not making the `num_words` cut here. Words that were not seen in the training set but are in the test set have simply been skipped. """ # Legacy support if 'nb_words' in kwargs: warnings.warn('The `nb_words` argument in `load_data` ' 'has been renamed `num_words`.') num_words = kwargs.pop('nb_words') if kwargs: raise TypeError('Unrecognized keyword arguments: ' + str(kwargs)) path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters.npz', file_hash='87aedbeb0cb229e378797a632c1997b6') with np.load(path) as f: xs, labels = f['x'], f['y'] np.random.seed(seed) indices = np.arange(len(xs)) np.random.shuffle(indices) xs = xs[indices] labels = labels[indices] if start_char is not None: xs = [[start_char] + [w + index_from for w in x] for x in xs] elif index_from: xs = [[w + index_from for w in x] for x in xs] if maxlen: xs, labels = _remove_long_seq(maxlen, xs, labels) if not num_words: num_words = max([max(x) for x in xs]) # by convention, use 2 as OOV word # reserve 'index_from' (=3 by default) characters: # 0 (padding), 1 (start), 2 (OOV) if oov_char is not None: xs = [[w if skip_top <= w < num_words else oov_char for w in x] for x in xs] else: xs = [[w for w in x if skip_top <= w < num_words] for x in xs] idx = int(len(xs) * (1 - test_split)) x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx]) x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:]) return (x_train, y_train), (x_test, y_test) def get_word_index(path='reuters_word_index.json'): """Retrieves the dictionary mapping words to word indices. # Arguments path: where to cache the data (relative to `~/.keras/dataset`). # Returns The word index dictionary. """ path = get_file( path, origin='https://s3.amazonaws.com/text-datasets/reuters_word_index.json', file_hash='4d44cc38712099c9e383dc6e5f11a921') f = open(path) data = json.load(f) f.close() return data Keras-2.2.4/keras/datasets/cifar.py0000644000000000116100000000170613240665765016716 0ustar rooteng00000000000000# -*- coding: utf-8 -*- """Utilities common to CIFAR10 and CIFAR100 datasets. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import sys from six.moves import cPickle def load_batch(fpath, label_key='labels'): """Internal utility for parsing CIFAR data. # Arguments fpath: path the file to parse. label_key: key for label data in the retrieve dictionary. # Returns A tuple `(data, labels)`. """ with open(fpath, 'rb') as f: if sys.version_info < (3,): d = cPickle.load(f) else: d = cPickle.load(f, encoding='bytes') # decode utf8 d_decoded = {} for k, v in d.items(): d_decoded[k.decode('utf8')] = v d = d_decoded data = d['data'] labels = d[label_key] data = data.reshape(data.shape[0], 3, 32, 32) return data, labels Keras-2.2.4/keras/datasets/cifar10.py0000644000000000116100000000250213227311000017021 0ustar rooteng00000000000000"""CIFAR10 small images classification dataset. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from .cifar import load_batch from ..utils.data_utils import get_file from .. import backend as K import numpy as np import os def load_data(): """Loads CIFAR10 dataset. # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ dirname = 'cifar-10-batches-py' origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' path = get_file(dirname, origin=origin, untar=True) num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000: i * 10000, :, :, :], y_train[(i - 1) * 10000: i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test) Keras-2.2.4/keras/datasets/mnist.py0000644000000000116100000000150513227311000016730 0ustar rooteng00000000000000"""MNIST handwritten digits dataset. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from ..utils.data_utils import get_file import numpy as np def load_data(path='mnist.npz'): """Loads the MNIST dataset. # Arguments path: path where to cache the dataset locally (relative to ~/.keras/datasets). # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ path = get_file(path, origin='https://s3.amazonaws.com/img-datasets/mnist.npz', file_hash='8a61469f7ea1b51cbae51d4f78837e45') f = np.load(path) x_train, y_train = f['x_train'], f['y_train'] x_test, y_test = f['x_test'], f['y_test'] f.close() return (x_train, y_train), (x_test, y_test) Keras-2.2.4/keras/legacy/0000755000000000116100000000000013355226624014702 5ustar rooteng00000000000000Keras-2.2.4/keras/legacy/interfaces.py0000644000000000116100000007207213354530144017401 0ustar rooteng00000000000000"""Interface converters for Keras 1 support in Keras 2. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import six import warnings import functools import numpy as np def generate_legacy_interface(allowed_positional_args=None, conversions=None, preprocessor=None, value_conversions=None, object_type='class'): if allowed_positional_args is None: check_positional_args = False else: check_positional_args = True allowed_positional_args = allowed_positional_args or [] conversions = conversions or [] value_conversions = value_conversions or [] def legacy_support(func): @six.wraps(func) def wrapper(*args, **kwargs): if object_type == 'class': object_name = args[0].__class__.__name__ else: object_name = func.__name__ if preprocessor: args, kwargs, converted = preprocessor(args, kwargs) else: converted = [] if check_positional_args: if len(args) > len(allowed_positional_args) + 1: raise TypeError('`' + object_name + '` can accept only ' + str(len(allowed_positional_args)) + ' positional arguments ' + str(tuple(allowed_positional_args)) + ', but you passed the following ' 'positional arguments: ' + str(list(args[1:]))) for key in value_conversions: if key in kwargs: old_value = kwargs[key] if old_value in value_conversions[key]: kwargs[key] = value_conversions[key][old_value] for old_name, new_name in conversions: if old_name in kwargs: value = kwargs.pop(old_name) if new_name in kwargs: raise_duplicate_arg_error(old_name, new_name) kwargs[new_name] = value converted.append((new_name, old_name)) if converted: signature = '`' + object_name + '(' for i, value in enumerate(args[1:]): if isinstance(value, six.string_types): signature += '"' + value + '"' else: if isinstance(value, np.ndarray): str_val = 'array' else: str_val = str(value) if len(str_val) > 10: str_val = str_val[:10] + '...' signature += str_val if i < len(args[1:]) - 1 or kwargs: signature += ', ' for i, (name, value) in enumerate(kwargs.items()): signature += name + '=' if isinstance(value, six.string_types): signature += '"' + value + '"' else: if isinstance(value, np.ndarray): str_val = 'array' else: str_val = str(value) if len(str_val) > 10: str_val = str_val[:10] + '...' signature += str_val if i < len(kwargs) - 1: signature += ', ' signature += ')`' warnings.warn('Update your `' + object_name + '` call to the ' + 'Keras 2 API: ' + signature, stacklevel=2) return func(*args, **kwargs) wrapper._original_function = func return wrapper return legacy_support generate_legacy_method_interface = functools.partial(generate_legacy_interface, object_type='method') def raise_duplicate_arg_error(old_arg, new_arg): raise TypeError('For the `' + new_arg + '` argument, ' 'the layer received both ' 'the legacy keyword argument ' '`' + old_arg + '` and the Keras 2 keyword argument ' '`' + new_arg + '`. Stick to the latter!') legacy_dense_support = generate_legacy_interface( allowed_positional_args=['units'], conversions=[('output_dim', 'units'), ('init', 'kernel_initializer'), ('W_regularizer', 'kernel_regularizer'), ('b_regularizer', 'bias_regularizer'), ('W_constraint', 'kernel_constraint'), ('b_constraint', 'bias_constraint'), ('bias', 'use_bias')]) legacy_dropout_support = generate_legacy_interface( allowed_positional_args=['rate', 'noise_shape', 'seed'], conversions=[('p', 'rate')]) def embedding_kwargs_preprocessor(args, kwargs): converted = [] if 'dropout' in kwargs: kwargs.pop('dropout') warnings.warn('The `dropout` argument is no longer support in `Embedding`. ' 'You can apply a `keras.layers.SpatialDropout1D` layer ' 'right after the `Embedding` layer to get the same behavior.', stacklevel=3) return args, kwargs, converted legacy_embedding_support = generate_legacy_interface( allowed_positional_args=['input_dim', 'output_dim'], conversions=[('init', 'embeddings_initializer'), ('W_regularizer', 'embeddings_regularizer'), ('W_constraint', 'embeddings_constraint')], preprocessor=embedding_kwargs_preprocessor) legacy_pooling1d_support = generate_legacy_interface( allowed_positional_args=['pool_size', 'strides', 'padding'], conversions=[('pool_length', 'pool_size'), ('stride', 'strides'), ('border_mode', 'padding')]) legacy_prelu_support = generate_legacy_interface( allowed_positional_args=['alpha_initializer'], conversions=[('init', 'alpha_initializer')]) legacy_gaussiannoise_support = generate_legacy_interface( allowed_positional_args=['stddev'], conversions=[('sigma', 'stddev')]) def recurrent_args_preprocessor(args, kwargs): converted = [] if 'forget_bias_init' in kwargs: if kwargs['forget_bias_init'] == 'one': kwargs.pop('forget_bias_init') kwargs['unit_forget_bias'] = True converted.append(('forget_bias_init', 'unit_forget_bias')) else: kwargs.pop('forget_bias_init') warnings.warn('The `forget_bias_init` argument ' 'has been ignored. Use `unit_forget_bias=True` ' 'instead to initialize with ones.', stacklevel=3) if 'input_dim' in kwargs: input_length = kwargs.pop('input_length', None) input_dim = kwargs.pop('input_dim') input_shape = (input_length, input_dim) kwargs['input_shape'] = input_shape converted.append(('input_dim', 'input_shape')) warnings.warn('The `input_dim` and `input_length` arguments ' 'in recurrent layers are deprecated. ' 'Use `input_shape` instead.', stacklevel=3) return args, kwargs, converted legacy_recurrent_support = generate_legacy_interface( allowed_positional_args=['units'], conversions=[('output_dim', 'units'), ('init', 'kernel_initializer'), ('inner_init', 'recurrent_initializer'), ('inner_activation', 'recurrent_activation'), ('W_regularizer', 'kernel_regularizer'), ('b_regularizer', 'bias_regularizer'), ('U_regularizer', 'recurrent_regularizer'), ('dropout_W', 'dropout'), ('dropout_U', 'recurrent_dropout'), ('consume_less', 'implementation')], value_conversions={'consume_less': {'cpu': 0, 'mem': 1, 'gpu': 2}}, preprocessor=recurrent_args_preprocessor) legacy_gaussiandropout_support = generate_legacy_interface( allowed_positional_args=['rate'], conversions=[('p', 'rate')]) legacy_pooling2d_support = generate_legacy_interface( allowed_positional_args=['pool_size', 'strides', 'padding'], conversions=[('border_mode', 'padding'), ('dim_ordering', 'data_format')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}) legacy_pooling3d_support = generate_legacy_interface( allowed_positional_args=['pool_size', 'strides', 'padding'], conversions=[('border_mode', 'padding'), ('dim_ordering', 'data_format')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}) legacy_global_pooling_support = generate_legacy_interface( conversions=[('dim_ordering', 'data_format')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}) legacy_upsampling1d_support = generate_legacy_interface( allowed_positional_args=['size'], conversions=[('length', 'size')]) legacy_upsampling2d_support = generate_legacy_interface( allowed_positional_args=['size'], conversions=[('dim_ordering', 'data_format')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}) legacy_upsampling3d_support = generate_legacy_interface( allowed_positional_args=['size'], conversions=[('dim_ordering', 'data_format')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}) def conv1d_args_preprocessor(args, kwargs): converted = [] if 'input_dim' in kwargs: if 'input_length' in kwargs: length = kwargs.pop('input_length') else: length = None input_shape = (length, kwargs.pop('input_dim')) kwargs['input_shape'] = input_shape converted.append(('input_shape', 'input_dim')) return args, kwargs, converted legacy_conv1d_support = generate_legacy_interface( allowed_positional_args=['filters', 'kernel_size'], conversions=[('nb_filter', 'filters'), ('filter_length', 'kernel_size'), ('subsample_length', 'strides'), ('border_mode', 'padding'), ('init', 'kernel_initializer'), ('W_regularizer', 'kernel_regularizer'), ('b_regularizer', 'bias_regularizer'), ('W_constraint', 'kernel_constraint'), ('b_constraint', 'bias_constraint'), ('bias', 'use_bias')], preprocessor=conv1d_args_preprocessor) def conv2d_args_preprocessor(args, kwargs): converted = [] if len(args) > 4: raise TypeError('Layer can receive at most 3 positional arguments.') elif len(args) == 4: if isinstance(args[2], int) and isinstance(args[3], int): new_keywords = ['padding', 'strides', 'data_format'] for kwd in new_keywords: if kwd in kwargs: raise ValueError( 'It seems that you are using the Keras 2 ' 'and you are passing both `kernel_size` and `strides` ' 'as integer positional arguments. For safety reasons, ' 'this is disallowed. Pass `strides` ' 'as a keyword argument instead.') kernel_size = (args[2], args[3]) args = [args[0], args[1], kernel_size] converted.append(('kernel_size', 'nb_row/nb_col')) elif len(args) == 3 and isinstance(args[2], int): if 'nb_col' in kwargs: kernel_size = (args[2], kwargs.pop('nb_col')) args = [args[0], args[1], kernel_size] converted.append(('kernel_size', 'nb_row/nb_col')) elif len(args) == 2: if 'nb_row' in kwargs and 'nb_col' in kwargs: kernel_size = (kwargs.pop('nb_row'), kwargs.pop('nb_col')) args = [args[0], args[1], kernel_size] converted.append(('kernel_size', 'nb_row/nb_col')) elif len(args) == 1: if 'nb_row' in kwargs and 'nb_col' in kwargs: kernel_size = (kwargs.pop('nb_row'), kwargs.pop('nb_col')) kwargs['kernel_size'] = kernel_size converted.append(('kernel_size', 'nb_row/nb_col')) return args, kwargs, converted legacy_conv2d_support = generate_legacy_interface( allowed_positional_args=['filters', 'kernel_size'], conversions=[('nb_filter', 'filters'), ('subsample', 'strides'), ('border_mode', 'padding'), ('dim_ordering', 'data_format'), ('init', 'kernel_initializer'), ('W_regularizer', 'kernel_regularizer'), ('b_regularizer', 'bias_regularizer'), ('W_constraint', 'kernel_constraint'), ('b_constraint', 'bias_constraint'), ('bias', 'use_bias')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}, preprocessor=conv2d_args_preprocessor) def separable_conv2d_args_preprocessor(args, kwargs): converted = [] if 'init' in kwargs: init = kwargs.pop('init') kwargs['depthwise_initializer'] = init kwargs['pointwise_initializer'] = init converted.append(('init', 'depthwise_initializer/pointwise_initializer')) args, kwargs, _converted = conv2d_args_preprocessor(args, kwargs) return args, kwargs, converted + _converted legacy_separable_conv2d_support = generate_legacy_interface( allowed_positional_args=['filters', 'kernel_size'], conversions=[('nb_filter', 'filters'), ('subsample', 'strides'), ('border_mode', 'padding'), ('dim_ordering', 'data_format'), ('b_regularizer', 'bias_regularizer'), ('b_constraint', 'bias_constraint'), ('bias', 'use_bias')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}, preprocessor=separable_conv2d_args_preprocessor) def deconv2d_args_preprocessor(args, kwargs): converted = [] if len(args) == 5: if isinstance(args[4], tuple): args = args[:-1] converted.append(('output_shape', None)) if 'output_shape' in kwargs: kwargs.pop('output_shape') converted.append(('output_shape', None)) args, kwargs, _converted = conv2d_args_preprocessor(args, kwargs) return args, kwargs, converted + _converted legacy_deconv2d_support = generate_legacy_interface( allowed_positional_args=['filters', 'kernel_size'], conversions=[('nb_filter', 'filters'), ('subsample', 'strides'), ('border_mode', 'padding'), ('dim_ordering', 'data_format'), ('init', 'kernel_initializer'), ('W_regularizer', 'kernel_regularizer'), ('b_regularizer', 'bias_regularizer'), ('W_constraint', 'kernel_constraint'), ('b_constraint', 'bias_constraint'), ('bias', 'use_bias')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}, preprocessor=deconv2d_args_preprocessor) def conv3d_args_preprocessor(args, kwargs): converted = [] if len(args) > 5: raise TypeError('Layer can receive at most 4 positional arguments.') if len(args) == 5: if all([isinstance(x, int) for x in args[2:5]]): kernel_size = (args[2], args[3], args[4]) args = [args[0], args[1], kernel_size] converted.append(('kernel_size', 'kernel_dim*')) elif len(args) == 4 and isinstance(args[3], int): if isinstance(args[2], int) and isinstance(args[3], int): new_keywords = ['padding', 'strides', 'data_format'] for kwd in new_keywords: if kwd in kwargs: raise ValueError( 'It seems that you are using the Keras 2 ' 'and you are passing both `kernel_size` and `strides` ' 'as integer positional arguments. For safety reasons, ' 'this is disallowed. Pass `strides` ' 'as a keyword argument instead.') if 'kernel_dim3' in kwargs: kernel_size = (args[2], args[3], kwargs.pop('kernel_dim3')) args = [args[0], args[1], kernel_size] converted.append(('kernel_size', 'kernel_dim*')) elif len(args) == 3: if all([x in kwargs for x in ['kernel_dim2', 'kernel_dim3']]): kernel_size = (args[2], kwargs.pop('kernel_dim2'), kwargs.pop('kernel_dim3')) args = [args[0], args[1], kernel_size] converted.append(('kernel_size', 'kernel_dim*')) elif len(args) == 2: if all([x in kwargs for x in ['kernel_dim1', 'kernel_dim2', 'kernel_dim3']]): kernel_size = (kwargs.pop('kernel_dim1'), kwargs.pop('kernel_dim2'), kwargs.pop('kernel_dim3')) args = [args[0], args[1], kernel_size] converted.append(('kernel_size', 'kernel_dim*')) elif len(args) == 1: if all([x in kwargs for x in ['kernel_dim1', 'kernel_dim2', 'kernel_dim3']]): kernel_size = (kwargs.pop('kernel_dim1'), kwargs.pop('kernel_dim2'), kwargs.pop('kernel_dim3')) kwargs['kernel_size'] = kernel_size converted.append(('kernel_size', 'nb_row/nb_col')) return args, kwargs, converted legacy_conv3d_support = generate_legacy_interface( allowed_positional_args=['filters', 'kernel_size'], conversions=[('nb_filter', 'filters'), ('subsample', 'strides'), ('border_mode', 'padding'), ('dim_ordering', 'data_format'), ('init', 'kernel_initializer'), ('W_regularizer', 'kernel_regularizer'), ('b_regularizer', 'bias_regularizer'), ('W_constraint', 'kernel_constraint'), ('b_constraint', 'bias_constraint'), ('bias', 'use_bias')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}, preprocessor=conv3d_args_preprocessor) def batchnorm_args_preprocessor(args, kwargs): converted = [] if len(args) > 1: raise TypeError('The `BatchNormalization` layer ' 'does not accept positional arguments. ' 'Use keyword arguments instead.') if 'mode' in kwargs: value = kwargs.pop('mode') if value != 0: raise TypeError('The `mode` argument of `BatchNormalization` ' 'no longer exists. `mode=1` and `mode=2` ' 'are no longer supported.') converted.append(('mode', None)) return args, kwargs, converted def convlstm2d_args_preprocessor(args, kwargs): converted = [] if 'forget_bias_init' in kwargs: value = kwargs.pop('forget_bias_init') if value == 'one': kwargs['unit_forget_bias'] = True converted.append(('forget_bias_init', 'unit_forget_bias')) else: warnings.warn('The `forget_bias_init` argument ' 'has been ignored. Use `unit_forget_bias=True` ' 'instead to initialize with ones.', stacklevel=3) args, kwargs, _converted = conv2d_args_preprocessor(args, kwargs) return args, kwargs, converted + _converted legacy_convlstm2d_support = generate_legacy_interface( allowed_positional_args=['filters', 'kernel_size'], conversions=[('nb_filter', 'filters'), ('subsample', 'strides'), ('border_mode', 'padding'), ('dim_ordering', 'data_format'), ('init', 'kernel_initializer'), ('inner_init', 'recurrent_initializer'), ('W_regularizer', 'kernel_regularizer'), ('U_regularizer', 'recurrent_regularizer'), ('b_regularizer', 'bias_regularizer'), ('inner_activation', 'recurrent_activation'), ('dropout_W', 'dropout'), ('dropout_U', 'recurrent_dropout'), ('bias', 'use_bias')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}, preprocessor=convlstm2d_args_preprocessor) legacy_batchnorm_support = generate_legacy_interface( allowed_positional_args=[], conversions=[('beta_init', 'beta_initializer'), ('gamma_init', 'gamma_initializer')], preprocessor=batchnorm_args_preprocessor) def zeropadding2d_args_preprocessor(args, kwargs): converted = [] if 'padding' in kwargs and isinstance(kwargs['padding'], dict): if set(kwargs['padding'].keys()) <= {'top_pad', 'bottom_pad', 'left_pad', 'right_pad'}: top_pad = kwargs['padding'].get('top_pad', 0) bottom_pad = kwargs['padding'].get('bottom_pad', 0) left_pad = kwargs['padding'].get('left_pad', 0) right_pad = kwargs['padding'].get('right_pad', 0) kwargs['padding'] = ((top_pad, bottom_pad), (left_pad, right_pad)) warnings.warn('The `padding` argument in the Keras 2 API no longer' 'accepts dict types. You can now input argument as: ' '`padding=(top_pad, bottom_pad, left_pad, right_pad)`.', stacklevel=3) elif len(args) == 2 and isinstance(args[1], dict): if set(args[1].keys()) <= {'top_pad', 'bottom_pad', 'left_pad', 'right_pad'}: top_pad = args[1].get('top_pad', 0) bottom_pad = args[1].get('bottom_pad', 0) left_pad = args[1].get('left_pad', 0) right_pad = args[1].get('right_pad', 0) args = (args[0], ((top_pad, bottom_pad), (left_pad, right_pad))) warnings.warn('The `padding` argument in the Keras 2 API no longer' 'accepts dict types. You can now input argument as: ' '`padding=((top_pad, bottom_pad), (left_pad, right_pad))`', stacklevel=3) return args, kwargs, converted legacy_zeropadding2d_support = generate_legacy_interface( allowed_positional_args=['padding'], conversions=[('dim_ordering', 'data_format')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}, preprocessor=zeropadding2d_args_preprocessor) legacy_zeropadding3d_support = generate_legacy_interface( allowed_positional_args=['padding'], conversions=[('dim_ordering', 'data_format')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}) legacy_cropping2d_support = generate_legacy_interface( allowed_positional_args=['cropping'], conversions=[('dim_ordering', 'data_format')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}) legacy_cropping3d_support = generate_legacy_interface( allowed_positional_args=['cropping'], conversions=[('dim_ordering', 'data_format')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}) legacy_spatialdropout1d_support = generate_legacy_interface( allowed_positional_args=['rate'], conversions=[('p', 'rate')]) legacy_spatialdropoutNd_support = generate_legacy_interface( allowed_positional_args=['rate'], conversions=[('p', 'rate'), ('dim_ordering', 'data_format')], value_conversions={'dim_ordering': {'tf': 'channels_last', 'th': 'channels_first', 'default': None}}) legacy_lambda_support = generate_legacy_interface( allowed_positional_args=['function', 'output_shape']) # Model methods def generator_methods_args_preprocessor(args, kwargs): converted = [] if len(args) < 3: if 'samples_per_epoch' in kwargs: samples_per_epoch = kwargs.pop('samples_per_epoch') if len(args) > 1: generator = args[1] else: generator = kwargs['generator'] if hasattr(generator, 'batch_size'): kwargs['steps_per_epoch'] = samples_per_epoch // generator.batch_size else: kwargs['steps_per_epoch'] = samples_per_epoch converted.append(('samples_per_epoch', 'steps_per_epoch')) keras1_args = {'samples_per_epoch', 'val_samples', 'nb_epoch', 'nb_val_samples', 'nb_worker'} if keras1_args.intersection(kwargs.keys()): warnings.warn('The semantics of the Keras 2 argument ' '`steps_per_epoch` is not the same as the ' 'Keras 1 argument `samples_per_epoch`. ' '`steps_per_epoch` is the number of batches ' 'to draw from the generator at each epoch. ' 'Basically steps_per_epoch = samples_per_epoch/batch_size. ' 'Similarly `nb_val_samples`->`validation_steps` and ' '`val_samples`->`steps` arguments have changed. ' 'Update your method calls accordingly.', stacklevel=3) return args, kwargs, converted legacy_generator_methods_support = generate_legacy_method_interface( allowed_positional_args=['generator', 'steps_per_epoch', 'epochs'], conversions=[('samples_per_epoch', 'steps_per_epoch'), ('val_samples', 'steps'), ('nb_epoch', 'epochs'), ('nb_val_samples', 'validation_steps'), ('nb_worker', 'workers'), ('pickle_safe', 'use_multiprocessing'), ('max_q_size', 'max_queue_size')], preprocessor=generator_methods_args_preprocessor) legacy_model_constructor_support = generate_legacy_interface( allowed_positional_args=None, conversions=[('input', 'inputs'), ('output', 'outputs')]) legacy_input_support = generate_legacy_interface( allowed_positional_args=None, conversions=[('input_dtype', 'dtype')]) def add_weight_args_preprocessing(args, kwargs): if len(args) > 1: if isinstance(args[1], (tuple, list)): kwargs['shape'] = args[1] args = (args[0],) + args[2:] if len(args) > 1: if isinstance(args[1], six.string_types): kwargs['name'] = args[1] args = (args[0],) + args[2:] return args, kwargs, [] legacy_add_weight_support = generate_legacy_interface( allowed_positional_args=['name', 'shape'], preprocessor=add_weight_args_preprocessing) def get_updates_arg_preprocessing(args, kwargs): # Old interface: (params, constraints, loss) # New interface: (loss, params) if len(args) > 4: raise TypeError('`get_update` call received more arguments ' 'than expected.') elif len(args) == 4: # Assuming old interface. opt, params, _, loss = args kwargs['loss'] = loss kwargs['params'] = params return [opt], kwargs, [] elif len(args) == 3: if isinstance(args[1], (list, tuple)): assert isinstance(args[2], dict) assert 'loss' in kwargs opt, params, _ = args kwargs['params'] = params return [opt], kwargs, [] return args, kwargs, [] legacy_get_updates_support = generate_legacy_interface( allowed_positional_args=None, conversions=[], preprocessor=get_updates_arg_preprocessing) Keras-2.2.4/keras/legacy/__init__.py0000644000000000116100000000000013146670577017011 0ustar rooteng00000000000000Keras-2.2.4/keras/legacy/layers.py0000644000000000116100000010750113354530144016551 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import warnings from ..engine import Layer, InputSpec from .. import backend as K from ..utils import conv_utils from ..utils.generic_utils import to_list from .. import regularizers from .. import constraints from .. import activations from .. import initializers class MaxoutDense(Layer): """A dense maxout layer. A `MaxoutDense` layer takes the element-wise maximum of `nb_feature` `Dense(input_dim, output_dim)` linear layers. This allows the layer to learn a convex, piecewise linear activation function over the inputs. Note that this is a *linear* layer; if you wish to apply activation function (you shouldn't need to --they are universal function approximators), an `Activation` layer must be added after. # Arguments output_dim: int > 0. nb_feature: number of Dense layers to use internally. init: name of initialization function for the weights of the layer (see [initializations](../initializations.md)), or alternatively, Theano function to use for weights initialization. This parameter is only relevant if you don't pass a `weights` argument. weights: list of Numpy arrays to set as initial weights. The list should have 2 elements, of shape `(input_dim, output_dim)` and (output_dim,) for weights and biases respectively. W_regularizer: instance of [WeightRegularizer](../regularizers.md) (eg. L1 or L2 regularization), applied to the main weights matrix. b_regularizer: instance of [WeightRegularizer](../regularizers.md), applied to the bias. activity_regularizer: instance of [ActivityRegularizer](../regularizers.md), applied to the network output. W_constraint: instance of the [constraints](../constraints.md) module (eg. maxnorm, nonneg), applied to the main weights matrix. b_constraint: instance of the [constraints](../constraints.md) module, applied to the bias. bias: whether to include a bias (i.e. make the layer affine rather than linear). input_dim: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model. # Input shape 2D tensor with shape: `(nb_samples, input_dim)`. # Output shape 2D tensor with shape: `(nb_samples, output_dim)`. # References - [Maxout Networks](http://arxiv.org/abs/1302.4389) """ def __init__(self, output_dim, nb_feature=4, init='glorot_uniform', weights=None, W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, bias=True, input_dim=None, **kwargs): warnings.warn('The `MaxoutDense` layer is deprecated ' 'and will be removed after 06/2017.') self.output_dim = output_dim self.nb_feature = nb_feature self.init = initializers.get(init) self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.bias = bias self.initial_weights = weights self.input_spec = InputSpec(ndim=2) self.input_dim = input_dim if self.input_dim: kwargs['input_shape'] = (self.input_dim,) super(MaxoutDense, self).__init__(**kwargs) def build(self, input_shape): input_dim = input_shape[1] self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim)) self.W = self.add_weight((self.nb_feature, input_dim, self.output_dim), initializer=self.init, name='W', regularizer=self.W_regularizer, constraint=self.W_constraint) if self.bias: self.b = self.add_weight((self.nb_feature, self.output_dim,), initializer='zero', name='b', regularizer=self.b_regularizer, constraint=self.b_constraint) else: self.b = None if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights self.built = True def compute_output_shape(self, input_shape): assert input_shape and len(input_shape) == 2 return (input_shape[0], self.output_dim) def call(self, x): # no activation, this layer is only linear. output = K.dot(x, self.W) if self.bias: output += self.b output = K.max(output, axis=1) return output def get_config(self): config = {'output_dim': self.output_dim, 'init': initializers.serialize(self.init), 'nb_feature': self.nb_feature, 'W_regularizer': regularizers.serialize(self.W_regularizer), 'b_regularizer': regularizers.serialize(self.b_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'W_constraint': constraints.serialize(self.W_constraint), 'b_constraint': constraints.serialize(self.b_constraint), 'bias': self.bias, 'input_dim': self.input_dim} base_config = super(MaxoutDense, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Highway(Layer): """Densely connected highway network. Highway layers are a natural extension of LSTMs to feedforward networks. # Arguments init: name of initialization function for the weights of the layer (see [initializations](../initializations.md)), or alternatively, Theano function to use for weights initialization. This parameter is only relevant if you don't pass a `weights` argument. activation: name of activation function to use (see [activations](../activations.md)), or alternatively, elementwise Theano function. If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x). weights: list of Numpy arrays to set as initial weights. The list should have 2 elements, of shape `(input_dim, output_dim)` and (output_dim,) for weights and biases respectively. W_regularizer: instance of [WeightRegularizer](../regularizers.md) (eg. L1 or L2 regularization), applied to the main weights matrix. b_regularizer: instance of [WeightRegularizer](../regularizers.md), applied to the bias. activity_regularizer: instance of [ActivityRegularizer](../regularizers.md), applied to the network output. W_constraint: instance of the [constraints](../constraints.md) module (eg. maxnorm, nonneg), applied to the main weights matrix. b_constraint: instance of the [constraints](../constraints.md) module, applied to the bias. bias: whether to include a bias (i.e. make the layer affine rather than linear). input_dim: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model. # Input shape 2D tensor with shape: `(nb_samples, input_dim)`. # Output shape 2D tensor with shape: `(nb_samples, input_dim)`. # References - [Highway Networks](http://arxiv.org/abs/1505.00387v2) """ def __init__(self, init='glorot_uniform', activation=None, weights=None, W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, bias=True, input_dim=None, **kwargs): warnings.warn('The `Highway` layer is deprecated ' 'and will be removed after 06/2017.') if 'transform_bias' in kwargs: kwargs.pop('transform_bias') warnings.warn('`transform_bias` argument is deprecated and ' 'has been removed.') self.init = initializers.get(init) self.activation = activations.get(activation) self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.bias = bias self.initial_weights = weights self.input_spec = InputSpec(ndim=2) self.input_dim = input_dim if self.input_dim: kwargs['input_shape'] = (self.input_dim,) super(Highway, self).__init__(**kwargs) def build(self, input_shape): input_dim = input_shape[1] self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim)) self.W = self.add_weight((input_dim, input_dim), initializer=self.init, name='W', regularizer=self.W_regularizer, constraint=self.W_constraint) self.W_carry = self.add_weight((input_dim, input_dim), initializer=self.init, name='W_carry') if self.bias: self.b = self.add_weight((input_dim,), initializer='zero', name='b', regularizer=self.b_regularizer, constraint=self.b_constraint) self.b_carry = self.add_weight((input_dim,), initializer='one', name='b_carry') else: self.b_carry = None if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights self.built = True def call(self, x): y = K.dot(x, self.W_carry) if self.bias: y += self.b_carry transform_weight = activations.sigmoid(y) y = K.dot(x, self.W) if self.bias: y += self.b act = self.activation(y) act *= transform_weight output = act + (1 - transform_weight) * x return output def get_config(self): config = {'init': initializers.serialize(self.init), 'activation': activations.serialize(self.activation), 'W_regularizer': regularizers.serialize(self.W_regularizer), 'b_regularizer': regularizers.serialize(self.b_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'W_constraint': constraints.serialize(self.W_constraint), 'b_constraint': constraints.serialize(self.b_constraint), 'bias': self.bias, 'input_dim': self.input_dim} base_config = super(Highway, self).get_config() return dict(list(base_config.items()) + list(config.items())) def AtrousConvolution1D(*args, **kwargs): from ..layers import Conv1D if 'atrous_rate' in kwargs: rate = kwargs.pop('atrous_rate') else: rate = 1 kwargs['dilation_rate'] = rate warnings.warn('The `AtrousConvolution1D` layer ' ' has been deprecated. Use instead ' 'the `Conv1D` layer with the `dilation_rate` ' 'argument.') return Conv1D(*args, **kwargs) def AtrousConvolution2D(*args, **kwargs): from ..layers import Conv2D if 'atrous_rate' in kwargs: rate = kwargs.pop('atrous_rate') else: rate = 1 kwargs['dilation_rate'] = rate warnings.warn('The `AtrousConvolution2D` layer ' ' has been deprecated. Use instead ' 'the `Conv2D` layer with the `dilation_rate` ' 'argument.') return Conv2D(*args, **kwargs) class Recurrent(Layer): """Abstract base class for recurrent layers. Do not use in a model -- it's not a valid layer! Use its children classes `LSTM`, `GRU` and `SimpleRNN` instead. All recurrent layers (`LSTM`, `GRU`, `SimpleRNN`) also follow the specifications of this class and accept the keyword arguments listed below. # Example ```python # as the first layer in a Sequential model model = Sequential() model.add(LSTM(32, input_shape=(10, 64))) # now model.output_shape == (None, 32) # note: `None` is the batch dimension. # for subsequent layers, no need to specify the input size: model.add(LSTM(16)) # to stack recurrent layers, you must use return_sequences=True # on any recurrent layer that feeds into another recurrent layer. # note that you only need to specify the input size on the first layer. model = Sequential() model.add(LSTM(64, input_dim=64, input_length=10, return_sequences=True)) model.add(LSTM(32, return_sequences=True)) model.add(LSTM(10)) ``` # Arguments weights: list of Numpy arrays to set as initial weights. The list should have 3 elements, of shapes: `[(input_dim, output_dim), (output_dim, output_dim), (output_dim,)]`. return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence. stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used. Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. implementation: one of {0, 1, or 2}. If set to 0, the RNN will use an implementation that uses fewer, larger matrix products, thus running faster on CPU but consuming more memory. If set to 1, the RNN will use more matrix products, but smaller ones, thus running slower (may actually be faster on GPU) while consuming less memory. If set to 2 (LSTM/GRU only), the RNN will combine the input gate, the forget gate and the output gate into a single matrix, enabling more time-efficient parallelization on the GPU. Note: RNN dropout must be shared for all gates, resulting in a slightly reduced regularization. input_dim: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model. input_length: Length of input sequences, to be specified when it is constant. This argument is required if you are going to connect `Flatten` then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed). Note that if the recurrent layer is not the first layer in your model, you would need to specify the input length at the level of the first layer (e.g. via the `input_shape` argument) # Input shapes 3D tensor with shape `(batch_size, timesteps, input_dim)`, (Optional) 2D tensors with shape `(batch_size, output_dim)`. # Output shape - if `return_state`: a list of tensors. The first tensor is the output. The remaining tensors are the last states, each with shape `(batch_size, units)`. - if `return_sequences`: 3D tensor with shape `(batch_size, timesteps, units)`. - else, 2D tensor with shape `(batch_size, units)`. # Masking This layer supports masking for input data with a variable number of timesteps. To introduce masks to your data, use an [Embedding](embeddings.md) layer with the `mask_zero` parameter set to `True`. # Note on using statefulness in RNNs You can set RNN layers to be 'stateful', which means that the states computed for the samples in one batch will be reused as initial states for the samples in the next batch. This assumes a one-to-one mapping between samples in different successive batches. To enable statefulness: - specify `stateful=True` in the layer constructor. - specify a fixed batch size for your model, by passing if sequential model: `batch_input_shape=(...)` to the first layer in your model. else for functional model with 1 or more Input layers: `batch_shape=(...)` to all the first layers in your model. This is the expected shape of your inputs *including the batch size*. It should be a tuple of integers, e.g. `(32, 10, 100)`. - specify `shuffle=False` when calling fit(). To reset the states of your model, call `.reset_states()` on either a specific layer, or on your entire model. # Note on specifying the initial state of RNNs You can specify the initial state of RNN layers symbolically by calling them with the keyword argument `initial_state`. The value of `initial_state` should be a tensor or list of tensors representing the initial state of the RNN layer. You can specify the initial state of RNN layers numerically by calling `reset_states` with the keyword argument `states`. The value of `states` should be a numpy array or list of numpy arrays representing the initial state of the RNN layer. """ def __init__(self, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, implementation=0, **kwargs): super(Recurrent, self).__init__(**kwargs) self.return_sequences = return_sequences self.return_state = return_state self.go_backwards = go_backwards self.stateful = stateful self.unroll = unroll self.implementation = implementation self.supports_masking = True self.input_spec = [InputSpec(ndim=3)] self.state_spec = None self.dropout = 0 self.recurrent_dropout = 0 def compute_output_shape(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] if self.return_sequences: output_shape = (input_shape[0], input_shape[1], self.units) else: output_shape = (input_shape[0], self.units) if self.return_state: state_shape = [(input_shape[0], self.units) for _ in self.states] return [output_shape] + state_shape else: return output_shape def compute_mask(self, inputs, mask): if isinstance(mask, list): mask = mask[0] output_mask = mask if self.return_sequences else None if self.return_state: state_mask = [None for _ in self.states] return [output_mask] + state_mask else: return output_mask def step(self, inputs, states): raise NotImplementedError def get_constants(self, inputs, training=None): return [] def get_initial_state(self, inputs): # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) initial_state = K.expand_dims(initial_state) # (samples, 1) # (samples, output_dim) initial_state = K.tile(initial_state, [1, self.units]) initial_state = [initial_state for _ in range(len(self.states))] return initial_state def preprocess_input(self, inputs, training=None): return inputs def __call__(self, inputs, initial_state=None, **kwargs): # If there are multiple inputs, then # they should be the main input and `initial_state` # e.g. when loading model from file if (isinstance(inputs, (list, tuple)) and len(inputs) > 1 and initial_state is None): initial_state = inputs[1:] inputs = inputs[0] # If `initial_state` is specified, # and if it a Keras tensor, # then add it to the inputs and temporarily # modify the input spec to include the state. if initial_state is None: return super(Recurrent, self).__call__(inputs, **kwargs) initial_state = to_list(initial_state, allow_tuple=True) is_keras_tensor = hasattr(initial_state[0], '_keras_history') for tensor in initial_state: if hasattr(tensor, '_keras_history') != is_keras_tensor: raise ValueError('The initial state of an RNN layer cannot be' ' specified with a mix of Keras tensors and' ' non-Keras tensors') if is_keras_tensor: # Compute the full input spec, including state input_spec = self.input_spec state_spec = self.state_spec input_spec = to_list(input_spec) state_spec = to_list(state_spec) self.input_spec = input_spec + state_spec # Compute the full inputs, including state inputs = [inputs] + list(initial_state) # Perform the call output = super(Recurrent, self).__call__(inputs, **kwargs) # Restore original input spec self.input_spec = input_spec return output else: kwargs['initial_state'] = initial_state return super(Recurrent, self).__call__(inputs, **kwargs) def call(self, inputs, mask=None, training=None, initial_state=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): initial_state = inputs[1:] inputs = inputs[0] elif initial_state is not None: pass elif self.stateful: initial_state = self.states else: initial_state = self.get_initial_state(inputs) if isinstance(mask, list): mask = mask[0] if len(initial_state) != len(self.states): raise ValueError('Layer has ' + str(len(self.states)) + ' states but was passed ' + str(len(initial_state)) + ' initial states.') input_shape = K.int_shape(inputs) timesteps = input_shape[1] if self.unroll and timesteps in [None, 1]: raise ValueError('Cannot unroll a RNN if the ' 'time dimension is undefined or equal to 1. \n' '- If using a Sequential model, ' 'specify the time dimension by passing ' 'an `input_shape` or `batch_input_shape` ' 'argument to your first layer. If your ' 'first layer is an Embedding, you can ' 'also use the `input_length` argument.\n' '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') constants = self.get_constants(inputs, training=None) preprocessed_input = self.preprocess_input(inputs, training=None) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_state, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=timesteps) if self.stateful: updates = [] for i in range(len(states)): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) # Properly set learning phase if 0 < self.dropout + self.recurrent_dropout: last_output._uses_learning_phase = True outputs._uses_learning_phase = True if self.return_sequences: output = outputs else: output = last_output if self.return_state: states = to_list(states, allow_tuple=True) return [output] + states else: return output def reset_states(self, states=None): if not self.stateful: raise AttributeError('Layer must be stateful.') batch_size = self.input_spec[0].shape[0] if not batch_size: raise ValueError('If a RNN is stateful, it needs to know ' 'its batch size. Specify the batch size ' 'of your input tensors: \n' '- If using a Sequential model, ' 'specify the batch size by passing ' 'a `batch_input_shape` ' 'argument to your first layer.\n' '- If using the functional API, specify ' 'the time dimension by passing a ' '`batch_shape` argument to your Input layer.') # initialize state if None if self.states[0] is None: self.states = [K.zeros((batch_size, self.units)) for _ in self.states] elif states is None: for state in self.states: K.set_value(state, np.zeros((batch_size, self.units))) else: states = to_list(states, allow_tuple=True) if len(states) != len(self.states): raise ValueError('Layer ' + self.name + ' expects ' + str(len(self.states)) + ' states, ' 'but it received ' + str(len(states)) + ' state values. Input received: ' + str(states)) for index, (value, state) in enumerate(zip(states, self.states)): if value.shape != (batch_size, self.units): raise ValueError('State ' + str(index) + ' is incompatible with layer ' + self.name + ': expected shape=' + str((batch_size, self.units)) + ', found shape=' + str(value.shape)) K.set_value(state, value) def get_config(self): config = {'return_sequences': self.return_sequences, 'return_state': self.return_state, 'go_backwards': self.go_backwards, 'stateful': self.stateful, 'unroll': self.unroll, 'implementation': self.implementation} base_config = super(Recurrent, self).get_config() return dict(list(base_config.items()) + list(config.items())) class ConvRecurrent2D(Recurrent): """Abstract base class for convolutional recurrent layers. Do not use in a model -- it's not a functional layer! # Arguments filters: Integer, the dimensionality of the output space (i.e. the number output of filters in the convolution). kernel_size: An integer or tuple/list of n integers, specifying the dimensions of the convolution window. strides: An integer or tuple/list of n integers, specifying the strides of the convolution. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, time, ..., channels)` while `channels_first` corresponds to inputs with shape `(batch, time, channels, ...)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". dilation_rate: An integer or tuple/list of n integers, specifying the dilation rate to use for dilated convolution. Currently, specifying any `dilation_rate` value != 1 is incompatible with specifying any `strides` value != 1. return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. go_backwards: Boolean (default False). If True, process the input sequence backwards. stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. # Input shape 5D tensor with shape `(num_samples, timesteps, channels, rows, cols)`. # Output shape - if `return_sequences`: 5D tensor with shape `(num_samples, timesteps, channels, rows, cols)`. - else, 4D tensor with shape `(num_samples, channels, rows, cols)`. # Masking This layer supports masking for input data with a variable number of timesteps. To introduce masks to your data, use an [Embedding](embeddings.md) layer with the `mask_zero` parameter set to `True`. **Note:** for the time being, masking is only supported with Theano. # Note on using statefulness in RNNs You can set RNN layers to be 'stateful', which means that the states computed for the samples in one batch will be reused as initial states for the samples in the next batch. This assumes a one-to-one mapping between samples in different successive batches. To enable statefulness: - specify `stateful=True` in the layer constructor. - specify a fixed batch size for your model, by passing a `batch_input_size=(...)` to the first layer in your model. This is the expected shape of your inputs *including the batch size*. It should be a tuple of integers, e.g. `(32, 10, 100)`. To reset the states of your model, call `.reset_states()` on either a specific layer, or on your entire model. """ def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), return_sequences=False, go_backwards=False, stateful=False, **kwargs): super(ConvRecurrent2D, self).__init__(**kwargs) self.filters = filters self.kernel_size = conv_utils.normalize_tuple(kernel_size, 2, 'kernel_size') self.strides = conv_utils.normalize_tuple(strides, 2, 'strides') self.padding = conv_utils.normalize_padding(padding) self.data_format = K.normalize_data_format(data_format) self.dilation_rate = conv_utils.normalize_tuple(dilation_rate, 2, 'dilation_rate') self.return_sequences = return_sequences self.go_backwards = go_backwards self.stateful = stateful self.input_spec = [InputSpec(ndim=5)] self.state_spec = None def compute_output_shape(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] if self.data_format == 'channels_first': rows = input_shape[3] cols = input_shape[4] elif self.data_format == 'channels_last': rows = input_shape[2] cols = input_shape[3] rows = conv_utils.conv_output_length(rows, self.kernel_size[0], padding=self.padding, stride=self.strides[0], dilation=self.dilation_rate[0]) cols = conv_utils.conv_output_length(cols, self.kernel_size[1], padding=self.padding, stride=self.strides[1], dilation=self.dilation_rate[1]) if self.return_sequences: if self.data_format == 'channels_first': output_shape = (input_shape[0], input_shape[1], self.filters, rows, cols) elif self.data_format == 'channels_last': output_shape = (input_shape[0], input_shape[1], rows, cols, self.filters) else: if self.data_format == 'channels_first': output_shape = (input_shape[0], self.filters, rows, cols) elif self.data_format == 'channels_last': output_shape = (input_shape[0], rows, cols, self.filters) if self.return_state: if self.data_format == 'channels_first': state_shape = (input_shape[0], self.filters, rows, cols) elif self.data_format == 'channels_last': state_shape = (input_shape[0], rows, cols, self.filters) output_shape = [output_shape, state_shape, state_shape] return output_shape def get_config(self): config = {'filters': self.filters, 'kernel_size': self.kernel_size, 'strides': self.strides, 'padding': self.padding, 'data_format': self.data_format, 'dilation_rate': self.dilation_rate, 'return_sequences': self.return_sequences, 'go_backwards': self.go_backwards, 'stateful': self.stateful} base_config = super(ConvRecurrent2D, self).get_config() return dict(list(base_config.items()) + list(config.items())) Keras-2.2.4/keras/__init__.py0000644000000000116100000000113513355226611015543 0ustar rooteng00000000000000from __future__ import absolute_import from . import utils from . import activations from . import applications from . import backend from . import datasets from . import engine from . import layers from . import preprocessing from . import wrappers from . import callbacks from . import constraints from . import initializers from . import metrics from . import models from . import losses from . import optimizers from . import regularizers # Also importable from root from .layers import Input from .models import Model from .models import Sequential __version__ = '2.2.4' Keras-2.2.4/keras/utils/0000755000000000116100000000000013355226624014576 5ustar rooteng00000000000000Keras-2.2.4/keras/utils/test_utils.py0000644000000000116100000001131213354530144017337 0ustar rooteng00000000000000"""Utilities related to Keras unit tests.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from numpy.testing import assert_allclose from .generic_utils import has_arg from ..engine import Model, Input from .. import backend as K def get_test_data(num_train=1000, num_test=500, input_shape=(10,), output_shape=(2,), classification=True, num_classes=2): """Generates test data to train a model on. classification=True overrides output_shape (i.e. output_shape is set to (1,)) and the output consists in integers in [0, num_classes-1]. Otherwise: float output with shape output_shape. """ samples = num_train + num_test if classification: y = np.random.randint(0, num_classes, size=(samples,)) X = np.zeros((samples,) + input_shape, dtype=np.float32) for i in range(samples): X[i] = np.random.normal(loc=y[i], scale=0.7, size=input_shape) else: y_loc = np.random.random((samples,)) X = np.zeros((samples,) + input_shape, dtype=np.float32) y = np.zeros((samples,) + output_shape, dtype=np.float32) for i in range(samples): X[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=input_shape) y[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=output_shape) return (X[:num_train], y[:num_train]), (X[num_train:], y[num_train:]) def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, input_data=None, expected_output=None, expected_output_dtype=None, fixed_batch_size=False): """Test routine for a layer with a single input tensor and single output tensor. """ # generate input data if input_data is None: assert input_shape if not input_dtype: input_dtype = K.floatx() input_data_shape = list(input_shape) for i, e in enumerate(input_data_shape): if e is None: input_data_shape[i] = np.random.randint(1, 4) input_data = (10 * np.random.random(input_data_shape)) input_data = input_data.astype(input_dtype) else: if input_shape is None: input_shape = input_data.shape if input_dtype is None: input_dtype = input_data.dtype if expected_output_dtype is None: expected_output_dtype = input_dtype # instantiation layer = layer_cls(**kwargs) # test get_weights , set_weights at layer level weights = layer.get_weights() layer.set_weights(weights) # test and instantiation from weights # Checking for empty weights array to avoid a problem where some # legacy layers return bad values from get_weights() if has_arg(layer_cls.__init__, 'weights') and len(weights): kwargs['weights'] = weights layer = layer_cls(**kwargs) expected_output_shape = layer.compute_output_shape(input_shape) def _layer_in_model_test(model): actual_output = model.predict(input_data) actual_output_shape = actual_output.shape for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: assert expected_dim == actual_dim if expected_output is not None: assert_allclose(actual_output, expected_output, rtol=1e-3) # test serialization, weight setting at model level model_config = model.get_config() recovered_model = model.__class__.from_config(model_config) if model.weights: weights = model.get_weights() recovered_model.set_weights(weights) _output = recovered_model.predict(input_data) assert_allclose(_output, actual_output, rtol=1e-3) # test training mode (e.g. useful when the layer has a # different behavior at training and testing time). if has_arg(layer.call, 'training'): model.compile('rmsprop', 'mse') model.train_on_batch(input_data, actual_output) return actual_output # test in functional API if fixed_batch_size: x = Input(batch_shape=input_shape, dtype=input_dtype) else: x = Input(shape=input_shape[1:], dtype=input_dtype) y = layer(x) assert K.dtype(y) == expected_output_dtype # check with the functional API model = Model(x, y) actual_output = _layer_in_model_test(model) # test instantiation from layer config layer_config = layer.get_config() layer_config['batch_input_shape'] = input_shape layer = layer.__class__.from_config(layer_config) # for further checks in the caller function return actual_output Keras-2.2.4/keras/utils/layer_utils.py0000644000000000116100000002472313312516314017503 0ustar rooteng00000000000000"""Utilities related to layer/model functionality. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from .conv_utils import convert_kernel from .. import backend as K import numpy as np def count_params(weights): """Count the total number of scalars composing the weights. # Arguments weights: An iterable containing the weights on which to compute params # Returns The total number of scalars composing the weights """ return int(np.sum([K.count_params(p) for p in set(weights)])) def print_summary(model, line_length=None, positions=None, print_fn=None): """Prints a summary of a model. # Arguments model: Keras model instance. line_length: Total length of printed lines (e.g. set this to adapt the display to different terminal window sizes). positions: Relative or absolute positions of log elements in each line. If not provided, defaults to `[.33, .55, .67, 1.]`. print_fn: Print function to use. It will be called on each line of the summary. You can set it to a custom function in order to capture the string summary. It defaults to `print` (prints to stdout). """ if print_fn is None: print_fn = print if model.__class__.__name__ == 'Sequential': sequential_like = True elif not model._is_graph_network: # We treat subclassed models as a simple sequence of layers, # for logging purposes. sequential_like = True else: sequential_like = True nodes_by_depth = model._nodes_by_depth.values() nodes = [] for v in nodes_by_depth: if (len(v) > 1) or (len(v) == 1 and len(v[0].inbound_layers) > 1): # if the model has multiple nodes # or if the nodes have multiple inbound_layers # the model is no longer sequential sequential_like = False break nodes += v if sequential_like: # search for shared layers for layer in model.layers: flag = False for node in layer._inbound_nodes: if node in nodes: if flag: sequential_like = False break else: flag = True if not sequential_like: break if sequential_like: line_length = line_length or 65 positions = positions or [.45, .85, 1.] if positions[-1] <= 1: positions = [int(line_length * p) for p in positions] # header names for the different log elements to_display = ['Layer (type)', 'Output Shape', 'Param #'] else: line_length = line_length or 98 positions = positions or [.33, .55, .67, 1.] if positions[-1] <= 1: positions = [int(line_length * p) for p in positions] # header names for the different log elements to_display = ['Layer (type)', 'Output Shape', 'Param #', 'Connected to'] relevant_nodes = [] for v in model._nodes_by_depth.values(): relevant_nodes += v def print_row(fields, positions): line = '' for i in range(len(fields)): if i > 0: line = line[:-1] + ' ' line += str(fields[i]) line = line[:positions[i]] line += ' ' * (positions[i] - len(line)) print_fn(line) print_fn('_' * line_length) print_row(to_display, positions) print_fn('=' * line_length) def print_layer_summary(layer): try: output_shape = layer.output_shape except AttributeError: output_shape = 'multiple' name = layer.name cls_name = layer.__class__.__name__ fields = [name + ' (' + cls_name + ')', output_shape, layer.count_params()] print_row(fields, positions) def print_layer_summary_with_connections(layer): """Prints a summary for a single layer. # Arguments layer: target layer. """ try: output_shape = layer.output_shape except AttributeError: output_shape = 'multiple' connections = [] for node in layer._inbound_nodes: if relevant_nodes and node not in relevant_nodes: # node is not part of the current network continue for i in range(len(node.inbound_layers)): inbound_layer = node.inbound_layers[i].name inbound_node_index = node.node_indices[i] inbound_tensor_index = node.tensor_indices[i] connections.append(inbound_layer + '[' + str(inbound_node_index) + '][' + str(inbound_tensor_index) + ']') name = layer.name cls_name = layer.__class__.__name__ if not connections: first_connection = '' else: first_connection = connections[0] fields = [name + ' (' + cls_name + ')', output_shape, layer.count_params(), first_connection] print_row(fields, positions) if len(connections) > 1: for i in range(1, len(connections)): fields = ['', '', '', connections[i]] print_row(fields, positions) layers = model.layers for i in range(len(layers)): if sequential_like: print_layer_summary(layers[i]) else: print_layer_summary_with_connections(layers[i]) if i == len(layers) - 1: print_fn('=' * line_length) else: print_fn('_' * line_length) model._check_trainable_weights_consistency() if hasattr(model, '_collected_trainable_weights'): trainable_count = count_params(model._collected_trainable_weights) else: trainable_count = count_params(model.trainable_weights) non_trainable_count = count_params(model.non_trainable_weights) print_fn( 'Total params: {:,}'.format(trainable_count + non_trainable_count)) print_fn('Trainable params: {:,}'.format(trainable_count)) print_fn('Non-trainable params: {:,}'.format(non_trainable_count)) print_fn('_' * line_length) def convert_all_kernels_in_model(model): """Converts all convolution kernels in a model from Theano to TensorFlow. Also works from TensorFlow to Theano. # Arguments model: target model for the conversion. """ # Note: SeparableConvolution not included # since only supported by TF. conv_classes = { 'Conv1D', 'Conv2D', 'Conv3D', 'Conv2DTranspose', } to_assign = [] for layer in model.layers: if layer.__class__.__name__ in conv_classes: original_kernel = K.get_value(layer.kernel) converted_kernel = convert_kernel(original_kernel) to_assign.append((layer.kernel, converted_kernel)) K.batch_set_value(to_assign) def convert_dense_weights_data_format(dense, previous_feature_map_shape, target_data_format='channels_first'): """Utility useful when changing a convnet's `data_format`. When porting the weights of a convnet from one data format to the other, if the convnet includes a `Flatten` layer (applied to the last convolutional feature map) followed by a `Dense` layer, the weights of that `Dense` layer should be updated to reflect the new dimension ordering. # Arguments dense: The target `Dense` layer. previous_feature_map_shape: A shape tuple of 3 integers, e.g. `(512, 7, 7)`. The shape of the convolutional feature map right before the `Flatten` layer that came before the target `Dense` layer. target_data_format: One of "channels_last", "channels_first". Set it "channels_last" if converting a "channels_first" model to "channels_last", or reciprocally. """ assert target_data_format in {'channels_last', 'channels_first'} kernel, bias = dense.get_weights() for i in range(kernel.shape[1]): if target_data_format == 'channels_first': c, h, w = previous_feature_map_shape original_fm_shape = (h, w, c) ki = kernel[:, i].reshape(original_fm_shape) ki = np.transpose(ki, (2, 0, 1)) # last -> first else: h, w, c = previous_feature_map_shape original_fm_shape = (c, h, w) ki = kernel[:, i].reshape(original_fm_shape) ki = np.transpose(ki, (1, 2, 0)) # first -> last kernel[:, i] = np.reshape(ki, (np.prod(previous_feature_map_shape),)) dense.set_weights([kernel, bias]) def get_source_inputs(tensor, layer=None, node_index=None): """Returns the list of input tensors necessary to compute `tensor`. Output will always be a list of tensors (potentially with 1 element). # Arguments tensor: The tensor to start from. layer: Origin layer of the tensor. Will be determined via tensor._keras_history if not provided. node_index: Origin node index of the tensor. # Returns List of input tensors. """ if not hasattr(tensor, '_keras_history'): return tensor if layer is None or node_index: layer, node_index, _ = tensor._keras_history if not layer._inbound_nodes: return [tensor] else: node = layer._inbound_nodes[node_index] if not node.inbound_layers: # Reached an Input layer, stop recursion. return node.input_tensors else: source_tensors = [] for i in range(len(node.inbound_layers)): x = node.input_tensors[i] layer = node.inbound_layers[i] node_index = node.node_indices[i] previous_sources = get_source_inputs(x, layer, node_index) # Avoid input redundancy. for x in previous_sources: if x not in source_tensors: source_tensors.append(x) return source_tensors Keras-2.2.4/keras/utils/generic_utils.py0000644000000000116100000004751613354530144020013 0ustar rooteng00000000000000"""Python utilities required by Keras.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import binascii import numpy as np import time import sys import six import marshal import types as python_types import inspect import codecs import collections _GLOBAL_CUSTOM_OBJECTS = {} class CustomObjectScope(object): """Provides a scope that changes to `_GLOBAL_CUSTOM_OBJECTS` cannot escape. Code within a `with` statement will be able to access custom objects by name. Changes to global custom objects persist within the enclosing `with` statement. At end of the `with` statement, global custom objects are reverted to state at beginning of the `with` statement. # Example Consider a custom object `MyObject` (e.g. a class): ```python with CustomObjectScope({'MyObject':MyObject}): layer = Dense(..., kernel_regularizer='MyObject') # save, load, etc. will recognize custom object by name ``` """ def __init__(self, *args): self.custom_objects = args self.backup = None def __enter__(self): self.backup = _GLOBAL_CUSTOM_OBJECTS.copy() for objects in self.custom_objects: _GLOBAL_CUSTOM_OBJECTS.update(objects) return self def __exit__(self, *args, **kwargs): _GLOBAL_CUSTOM_OBJECTS.clear() _GLOBAL_CUSTOM_OBJECTS.update(self.backup) def custom_object_scope(*args): """Provides a scope that changes to `_GLOBAL_CUSTOM_OBJECTS` cannot escape. Convenience wrapper for `CustomObjectScope`. Code within a `with` statement will be able to access custom objects by name. Changes to global custom objects persist within the enclosing `with` statement. At end of the `with` statement, global custom objects are reverted to state at beginning of the `with` statement. # Example Consider a custom object `MyObject` ```python with custom_object_scope({'MyObject':MyObject}): layer = Dense(..., kernel_regularizer='MyObject') # save, load, etc. will recognize custom object by name ``` # Arguments *args: Variable length list of dictionaries of name, class pairs to add to custom objects. # Returns Object of type `CustomObjectScope`. """ return CustomObjectScope(*args) def get_custom_objects(): """Retrieves a live reference to the global dictionary of custom objects. Updating and clearing custom objects using `custom_object_scope` is preferred, but `get_custom_objects` can be used to directly access `_GLOBAL_CUSTOM_OBJECTS`. # Example ```python get_custom_objects().clear() get_custom_objects()['MyObject'] = MyObject ``` # Returns Global dictionary of names to classes (`_GLOBAL_CUSTOM_OBJECTS`). """ return _GLOBAL_CUSTOM_OBJECTS def serialize_keras_object(instance): if instance is None: return None if hasattr(instance, 'get_config'): return { 'class_name': instance.__class__.__name__, 'config': instance.get_config() } if hasattr(instance, '__name__'): return instance.__name__ else: raise ValueError('Cannot serialize', instance) def deserialize_keras_object(identifier, module_objects=None, custom_objects=None, printable_module_name='object'): if isinstance(identifier, dict): # In this case we are dealing with a Keras config dictionary. config = identifier if 'class_name' not in config or 'config' not in config: raise ValueError('Improper config format: ' + str(config)) class_name = config['class_name'] if custom_objects and class_name in custom_objects: cls = custom_objects[class_name] elif class_name in _GLOBAL_CUSTOM_OBJECTS: cls = _GLOBAL_CUSTOM_OBJECTS[class_name] else: module_objects = module_objects or {} cls = module_objects.get(class_name) if cls is None: raise ValueError('Unknown ' + printable_module_name + ': ' + class_name) if hasattr(cls, 'from_config'): custom_objects = custom_objects or {} if has_arg(cls.from_config, 'custom_objects'): return cls.from_config( config['config'], custom_objects=dict(list(_GLOBAL_CUSTOM_OBJECTS.items()) + list(custom_objects.items()))) with CustomObjectScope(custom_objects): return cls.from_config(config['config']) else: # Then `cls` may be a function returning a class. # in this case by convention `config` holds # the kwargs of the function. custom_objects = custom_objects or {} with CustomObjectScope(custom_objects): return cls(**config['config']) elif isinstance(identifier, six.string_types): function_name = identifier if custom_objects and function_name in custom_objects: fn = custom_objects.get(function_name) elif function_name in _GLOBAL_CUSTOM_OBJECTS: fn = _GLOBAL_CUSTOM_OBJECTS[function_name] else: fn = module_objects.get(function_name) if fn is None: raise ValueError('Unknown ' + printable_module_name + ':' + function_name) return fn else: raise ValueError('Could not interpret serialized ' + printable_module_name + ': ' + identifier) def func_dump(func): """Serializes a user defined function. # Arguments func: the function to serialize. # Returns A tuple `(code, defaults, closure)`. """ raw_code = marshal.dumps(func.__code__) code = codecs.encode(raw_code, 'base64').decode('ascii') defaults = func.__defaults__ if func.__closure__: closure = tuple(c.cell_contents for c in func.__closure__) else: closure = None return code, defaults, closure def func_load(code, defaults=None, closure=None, globs=None): """Deserializes a user defined function. # Arguments code: bytecode of the function. defaults: defaults of the function. closure: closure of the function. globs: dictionary of global objects. # Returns A function object. """ if isinstance(code, (tuple, list)): # unpack previous dump code, defaults, closure = code if isinstance(defaults, list): defaults = tuple(defaults) def ensure_value_to_cell(value): """Ensures that a value is converted to a python cell object. # Arguments value: Any value that needs to be casted to the cell type # Returns A value wrapped as a cell object (see function "func_load") """ def dummy_fn(): value # just access it so it gets captured in .__closure__ cell_value = dummy_fn.__closure__[0] if not isinstance(value, type(cell_value)): return cell_value else: return value if closure is not None: closure = tuple(ensure_value_to_cell(_) for _ in closure) try: raw_code = codecs.decode(code.encode('ascii'), 'base64') code = marshal.loads(raw_code) except (UnicodeEncodeError, binascii.Error, ValueError): # backwards compatibility for models serialized prior to 2.1.2 raw_code = code.encode('raw_unicode_escape') code = marshal.loads(raw_code) if globs is None: globs = globals() return python_types.FunctionType(code, globs, name=code.co_name, argdefs=defaults, closure=closure) def has_arg(fn, name, accept_all=False): """Checks if a callable accepts a given keyword argument. For Python 2, checks if there is an argument with the given name. For Python 3, checks if there is an argument with the given name, and also whether this argument can be called with a keyword (i.e. if it is not a positional-only argument). # Arguments fn: Callable to inspect. name: Check if `fn` can be called with `name` as a keyword argument. accept_all: What to return if there is no parameter called `name` but the function accepts a `**kwargs` argument. # Returns bool, whether `fn` accepts a `name` keyword argument. """ if sys.version_info < (3,): arg_spec = inspect.getargspec(fn) if accept_all and arg_spec.keywords is not None: return True return (name in arg_spec.args) elif sys.version_info < (3, 3): arg_spec = inspect.getfullargspec(fn) if accept_all and arg_spec.varkw is not None: return True return (name in arg_spec.args or name in arg_spec.kwonlyargs) else: signature = inspect.signature(fn) parameter = signature.parameters.get(name) if parameter is None: if accept_all: for param in signature.parameters.values(): if param.kind == inspect.Parameter.VAR_KEYWORD: return True return False return (parameter.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.KEYWORD_ONLY)) class Progbar(object): """Displays a progress bar. # Arguments target: Total number of steps expected, None if unknown. width: Progress bar width on screen. verbose: Verbosity mode, 0 (silent), 1 (verbose), 2 (semi-verbose) stateful_metrics: Iterable of string names of metrics that should *not* be averaged over time. Metrics in this list will be displayed as-is. All others will be averaged by the progbar before display. interval: Minimum visual progress update interval (in seconds). """ def __init__(self, target, width=30, verbose=1, interval=0.05, stateful_metrics=None): self.target = target self.width = width self.verbose = verbose self.interval = interval if stateful_metrics: self.stateful_metrics = set(stateful_metrics) else: self.stateful_metrics = set() self._dynamic_display = ((hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()) or 'ipykernel' in sys.modules) self._total_width = 0 self._seen_so_far = 0 self._values = collections.OrderedDict() self._start = time.time() self._last_update = 0 def update(self, current, values=None): """Updates the progress bar. # Arguments current: Index of current step. values: List of tuples: `(name, value_for_last_step)`. If `name` is in `stateful_metrics`, `value_for_last_step` will be displayed as-is. Else, an average of the metric over time will be displayed. """ values = values or [] for k, v in values: if k not in self.stateful_metrics: if k not in self._values: self._values[k] = [v * (current - self._seen_so_far), current - self._seen_so_far] else: self._values[k][0] += v * (current - self._seen_so_far) self._values[k][1] += (current - self._seen_so_far) else: # Stateful metrics output a numeric value. This representation # means "take an average from a single value" but keeps the # numeric formatting. self._values[k] = [v, 1] self._seen_so_far = current now = time.time() info = ' - %.0fs' % (now - self._start) if self.verbose == 1: if (now - self._last_update < self.interval and self.target is not None and current < self.target): return prev_total_width = self._total_width if self._dynamic_display: sys.stdout.write('\b' * prev_total_width) sys.stdout.write('\r') else: sys.stdout.write('\n') if self.target is not None: numdigits = int(np.floor(np.log10(self.target))) + 1 barstr = '%%%dd/%d [' % (numdigits, self.target) bar = barstr % current prog = float(current) / self.target prog_width = int(self.width * prog) if prog_width > 0: bar += ('=' * (prog_width - 1)) if current < self.target: bar += '>' else: bar += '=' bar += ('.' * (self.width - prog_width)) bar += ']' else: bar = '%7d/Unknown' % current self._total_width = len(bar) sys.stdout.write(bar) if current: time_per_unit = (now - self._start) / current else: time_per_unit = 0 if self.target is not None and current < self.target: eta = time_per_unit * (self.target - current) if eta > 3600: eta_format = ('%d:%02d:%02d' % (eta // 3600, (eta % 3600) // 60, eta % 60)) elif eta > 60: eta_format = '%d:%02d' % (eta // 60, eta % 60) else: eta_format = '%ds' % eta info = ' - ETA: %s' % eta_format else: if time_per_unit >= 1: info += ' %.0fs/step' % time_per_unit elif time_per_unit >= 1e-3: info += ' %.0fms/step' % (time_per_unit * 1e3) else: info += ' %.0fus/step' % (time_per_unit * 1e6) for k in self._values: info += ' - %s:' % k if isinstance(self._values[k], list): avg = np.mean( self._values[k][0] / max(1, self._values[k][1])) if abs(avg) > 1e-3: info += ' %.4f' % avg else: info += ' %.4e' % avg else: info += ' %s' % self._values[k] self._total_width += len(info) if prev_total_width > self._total_width: info += (' ' * (prev_total_width - self._total_width)) if self.target is not None and current >= self.target: info += '\n' sys.stdout.write(info) sys.stdout.flush() elif self.verbose == 2: if self.target is None or current >= self.target: for k in self._values: info += ' - %s:' % k avg = np.mean( self._values[k][0] / max(1, self._values[k][1])) if avg > 1e-3: info += ' %.4f' % avg else: info += ' %.4e' % avg info += '\n' sys.stdout.write(info) sys.stdout.flush() self._last_update = now def add(self, n, values=None): self.update(self._seen_so_far + n, values) def to_list(x, allow_tuple=False): """Normalizes a list/tensor into a list. If a tensor is passed, we return a list of size 1 containing the tensor. # Arguments x: target object to be normalized. allow_tuple: If False and x is a tuple, it will be converted into a list with a single element (the tuple). Else converts the tuple to a list. # Returns A list. """ if isinstance(x, list): return x if allow_tuple and isinstance(x, tuple): return list(x) return [x] def unpack_singleton(x): """Gets the first element if the iterable has only one value. Otherwise return the iterable. # Argument: x: A list or tuple. # Returns: The same iterable or the first element. """ if len(x) == 1: return x[0] return x def object_list_uid(object_list): object_list = to_list(object_list) return ', '.join([str(abs(id(x))) for x in object_list]) def is_all_none(iterable_or_element): iterable = to_list(iterable_or_element, allow_tuple=True) for element in iterable: if element is not None: return False return True def slice_arrays(arrays, start=None, stop=None): """Slices an array or list of arrays. This takes an array-like, or a list of array-likes, and outputs: - arrays[start:stop] if `arrays` is an array-like - [x[start:stop] for x in arrays] if `arrays` is a list Can also work on list/array of indices: `_slice_arrays(x, indices)` # Arguments arrays: Single array or list of arrays. start: can be an integer index (start index) or a list/array of indices stop: integer (stop index); should be None if `start` was a list. # Returns A slice of the array(s). """ if arrays is None: return [None] elif isinstance(arrays, list): if hasattr(start, '__len__'): # hdf5 datasets only support list objects as indices if hasattr(start, 'shape'): start = start.tolist() return [None if x is None else x[start] for x in arrays] else: return [None if x is None else x[start:stop] for x in arrays] else: if hasattr(start, '__len__'): if hasattr(start, 'shape'): start = start.tolist() return arrays[start] elif hasattr(start, '__getitem__'): return arrays[start:stop] else: return [None] def transpose_shape(shape, target_format, spatial_axes): """Converts a tuple or a list to the correct `data_format`. It does so by switching the positions of its elements. # Arguments shape: Tuple or list, often representing shape, corresponding to `'channels_last'`. target_format: A string, either `'channels_first'` or `'channels_last'`. spatial_axes: A tuple of integers. Correspond to the indexes of the spatial axes. For example, if you pass a shape representing (batch_size, timesteps, rows, cols, channels), then `spatial_axes=(2, 3)`. # Returns A tuple or list, with the elements permuted according to `target_format`. # Example ```python >>> from keras.utils.generic_utils import transpose_shape >>> transpose_shape((16, 128, 128, 32),'channels_first', spatial_axes=(1, 2)) (16, 32, 128, 128) >>> transpose_shape((16, 128, 128, 32), 'channels_last', spatial_axes=(1, 2)) (16, 128, 128, 32) >>> transpose_shape((128, 128, 32), 'channels_first', spatial_axes=(0, 1)) (32, 128, 128) ``` # Raises ValueError: if `value` or the global `data_format` invalid. """ if target_format == 'channels_first': new_values = shape[:spatial_axes[0]] new_values += (shape[-1],) new_values += tuple(shape[x] for x in spatial_axes) if isinstance(shape, list): return list(new_values) return new_values elif target_format == 'channels_last': return shape else: raise ValueError('The `data_format` argument must be one of ' '"channels_first", "channels_last". Received: ' + str(target_format)) Keras-2.2.4/keras/utils/__init__.py0000644000000000116100000000170213354530144016701 0ustar rooteng00000000000000from __future__ import absolute_import from . import np_utils from . import generic_utils from . import data_utils from . import io_utils from . import conv_utils # Globally-importable utils. from .io_utils import HDF5Matrix from .io_utils import h5dict from .data_utils import get_file from .data_utils import Sequence from .data_utils import GeneratorEnqueuer from .data_utils import OrderedEnqueuer from .generic_utils import CustomObjectScope from .generic_utils import custom_object_scope from .generic_utils import get_custom_objects from .generic_utils import serialize_keras_object from .generic_utils import deserialize_keras_object from .generic_utils import Progbar from .layer_utils import convert_all_kernels_in_model from .layer_utils import get_source_inputs from .layer_utils import print_summary from .vis_utils import plot_model from .np_utils import to_categorical from .np_utils import normalize from .multi_gpu_utils import multi_gpu_model Keras-2.2.4/keras/utils/conv_utils.py0000644000000000116100000001404013354530144017326 0ustar rooteng00000000000000"""Utilities used in convolutional layers. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from six.moves import range import numpy as np from .. import backend as K def normalize_tuple(value, n, name): """Transforms a single int or iterable of ints into an int tuple. # Arguments value: The value to validate and convert. Could be an int, or any iterable of ints. n: The size of the tuple to be returned. name: The name of the argument being validated, e.g. `strides` or `kernel_size`. This is only used to format error messages. # Returns A tuple of n integers. # Raises ValueError: If something else than an int/long or iterable thereof was passed. """ if isinstance(value, int): return (value,) * n else: try: value_tuple = tuple(value) except TypeError: raise ValueError('The `' + name + '` argument must be a tuple of ' + str(n) + ' integers. Received: ' + str(value)) if len(value_tuple) != n: raise ValueError('The `' + name + '` argument must be a tuple of ' + str(n) + ' integers. Received: ' + str(value)) for single_value in value_tuple: try: int(single_value) except ValueError: raise ValueError('The `' + name + '` argument must be a tuple of ' + str(n) + ' integers. Received: ' + str(value) + ' ' 'including element ' + str(single_value) + ' of ' 'type ' + str(type(single_value))) return value_tuple def normalize_padding(value): padding = value.lower() allowed = {'valid', 'same', 'causal'} if K.backend() == 'theano': allowed.add('full') if padding not in allowed: raise ValueError('The `padding` argument must be one of "valid", "same" ' '(or "causal" for Conv1D). Received: ' + str(padding)) return padding def convert_kernel(kernel): """Converts a Numpy kernel matrix from Theano format to TensorFlow format. Also works reciprocally, since the transformation is its own inverse. # Arguments kernel: Numpy array (3D, 4D or 5D). # Returns The converted kernel. # Raises ValueError: in case of invalid kernel shape or invalid data_format. """ kernel = np.asarray(kernel) if not 3 <= kernel.ndim <= 5: raise ValueError('Invalid kernel shape:', kernel.shape) slices = [slice(None, None, -1) for _ in range(kernel.ndim)] no_flip = (slice(None, None), slice(None, None)) slices[-2:] = no_flip return np.copy(kernel[slices]) def conv_output_length(input_length, filter_size, padding, stride, dilation=1): """Determines output length of a convolution given input length. # Arguments input_length: integer. filter_size: integer. padding: one of `"same"`, `"valid"`, `"full"`. stride: integer. dilation: dilation rate, integer. # Returns The output length (integer). """ if input_length is None: return None assert padding in {'same', 'valid', 'full', 'causal'} dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1) if padding == 'same': output_length = input_length elif padding == 'valid': output_length = input_length - dilated_filter_size + 1 elif padding == 'causal': output_length = input_length elif padding == 'full': output_length = input_length + dilated_filter_size - 1 return (output_length + stride - 1) // stride def conv_input_length(output_length, filter_size, padding, stride): """Determines input length of a convolution given output length. # Arguments output_length: integer. filter_size: integer. padding: one of `"same"`, `"valid"`, `"full"`. stride: integer. # Returns The input length (integer). """ if output_length is None: return None assert padding in {'same', 'valid', 'full'} if padding == 'same': pad = filter_size // 2 elif padding == 'valid': pad = 0 elif padding == 'full': pad = filter_size - 1 return (output_length - 1) * stride - 2 * pad + filter_size def deconv_length(dim_size, stride_size, kernel_size, padding, output_padding, dilation=1): """Determines output length of a transposed convolution given input length. # Arguments dim_size: Integer, the input length. stride_size: Integer, the stride along the dimension of `dim_size`. kernel_size: Integer, the kernel size along the dimension of `dim_size`. padding: One of `"same"`, `"valid"`, `"full"`. output_padding: Integer, amount of padding along the output dimension, Can be set to `None` in which case the output length is inferred. dilation: dilation rate, integer. # Returns The output length (integer). """ assert padding in {'same', 'valid', 'full'} if dim_size is None: return None # Get the dilated kernel size kernel_size = kernel_size + (kernel_size - 1) * (dilation - 1) # Infer length if output padding is None, else compute the exact length if output_padding is None: if padding == 'valid': dim_size = dim_size * stride_size + max(kernel_size - stride_size, 0) elif padding == 'full': dim_size = dim_size * stride_size - (stride_size + kernel_size - 2) elif padding == 'same': dim_size = dim_size * stride_size else: if padding == 'same': pad = kernel_size // 2 elif padding == 'valid': pad = 0 elif padding == 'full': pad = kernel_size - 1 dim_size = ((dim_size - 1) * stride_size + kernel_size - 2 * pad + output_padding) return dim_size Keras-2.2.4/keras/utils/data_utils.py0000644000000000116100000005737613354530144017315 0ustar rooteng00000000000000"""Utilities for file download and caching.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import hashlib import multiprocessing as mp import os import random import shutil import sys import tarfile import threading import time import warnings import zipfile from abc import abstractmethod from contextlib import closing from multiprocessing.pool import ThreadPool import numpy as np import six from six.moves.urllib.error import HTTPError from six.moves.urllib.error import URLError from six.moves.urllib.request import urlopen try: import queue except ImportError: import Queue as queue from ..utils.generic_utils import Progbar if sys.version_info[0] == 2: def urlretrieve(url, filename, reporthook=None, data=None): """Replacement for `urlretrive` for Python 2. Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy `urllib` module, known to have issues with proxy management. # Arguments url: url to retrieve. filename: where to store the retrieved data locally. reporthook: a hook function that will be called once on establishment of the network connection and once after each block read thereafter. The hook will be passed three arguments; a count of blocks transferred so far, a block size in bytes, and the total size of the file. data: `data` argument passed to `urlopen`. """ def chunk_read(response, chunk_size=8192, reporthook=None): content_type = response.info().get('Content-Length') total_size = -1 if content_type is not None: total_size = int(content_type.strip()) count = 0 while True: chunk = response.read(chunk_size) count += 1 if reporthook is not None: reporthook(count, chunk_size, total_size) if chunk: yield chunk else: break with closing(urlopen(url, data)) as response, open(filename, 'wb') as fd: for chunk in chunk_read(response, reporthook=reporthook): fd.write(chunk) else: from six.moves.urllib.request import urlretrieve def _extract_archive(file_path, path='.', archive_format='auto'): """Extracts an archive if it matches tar, tar.gz, tar.bz, or zip formats. # Arguments file_path: path to the archive file path: path to extract the archive file archive_format: Archive format to try for extracting the file. Options are 'auto', 'tar', 'zip', and None. 'tar' includes tar, tar.gz, and tar.bz files. The default 'auto' is ['tar', 'zip']. None or an empty list will return no matches found. # Returns True if a match was found and an archive extraction was completed, False otherwise. """ if archive_format is None: return False if archive_format is 'auto': archive_format = ['tar', 'zip'] if isinstance(archive_format, six.string_types): archive_format = [archive_format] for archive_type in archive_format: if archive_type is 'tar': open_fn = tarfile.open is_match_fn = tarfile.is_tarfile if archive_type is 'zip': open_fn = zipfile.ZipFile is_match_fn = zipfile.is_zipfile if is_match_fn(file_path): with open_fn(file_path) as archive: try: archive.extractall(path) except (tarfile.TarError, RuntimeError, KeyboardInterrupt): if os.path.exists(path): if os.path.isfile(path): os.remove(path) else: shutil.rmtree(path) raise return True return False def get_file(fname, origin, untar=False, md5_hash=None, file_hash=None, cache_subdir='datasets', hash_algorithm='auto', extract=False, archive_format='auto', cache_dir=None): """Downloads a file from a URL if it not already in the cache. By default the file at the url `origin` is downloaded to the cache_dir `~/.keras`, placed in the cache_subdir `datasets`, and given the filename `fname`. The final location of a file `example.txt` would therefore be `~/.keras/datasets/example.txt`. Files in tar, tar.gz, tar.bz, and zip formats can also be extracted. Passing a hash will verify the file after download. The command line programs `shasum` and `sha256sum` can compute the hash. # Arguments fname: Name of the file. If an absolute path `/path/to/file.txt` is specified the file will be saved at that location. origin: Original URL of the file. untar: Deprecated in favor of 'extract'. boolean, whether the file should be decompressed md5_hash: Deprecated in favor of 'file_hash'. md5 hash of the file for verification file_hash: The expected hash string of the file after download. The sha256 and md5 hash algorithms are both supported. cache_subdir: Subdirectory under the Keras cache dir where the file is saved. If an absolute path `/path/to/folder` is specified the file will be saved at that location. hash_algorithm: Select the hash algorithm to verify the file. options are 'md5', 'sha256', and 'auto'. The default 'auto' detects the hash algorithm in use. extract: True tries extracting the file as an Archive, like tar or zip. archive_format: Archive format to try for extracting the file. Options are 'auto', 'tar', 'zip', and None. 'tar' includes tar, tar.gz, and tar.bz files. The default 'auto' is ['tar', 'zip']. None or an empty list will return no matches found. cache_dir: Location to store cached files, when None it defaults to the [Keras Directory](/faq/#where-is-the-keras-configuration-filed-stored). # Returns Path to the downloaded file """ # noqa if cache_dir is None: cache_dir = os.path.join(os.path.expanduser('~'), '.keras') if md5_hash is not None and file_hash is None: file_hash = md5_hash hash_algorithm = 'md5' datadir_base = os.path.expanduser(cache_dir) if not os.access(datadir_base, os.W_OK): datadir_base = os.path.join('/tmp', '.keras') datadir = os.path.join(datadir_base, cache_subdir) if not os.path.exists(datadir): os.makedirs(datadir) if untar: untar_fpath = os.path.join(datadir, fname) fpath = untar_fpath + '.tar.gz' else: fpath = os.path.join(datadir, fname) download = False if os.path.exists(fpath): # File found; verify integrity if a hash was provided. if file_hash is not None: if not validate_file(fpath, file_hash, algorithm=hash_algorithm): print('A local file was found, but it seems to be ' 'incomplete or outdated because the ' + hash_algorithm + ' file hash does not match the original value of ' + file_hash + ' so we will re-download the data.') download = True else: download = True if download: print('Downloading data from', origin) class ProgressTracker(object): # Maintain progbar for the lifetime of download. # This design was chosen for Python 2.7 compatibility. progbar = None def dl_progress(count, block_size, total_size): if ProgressTracker.progbar is None: if total_size is -1: total_size = None ProgressTracker.progbar = Progbar(total_size) else: ProgressTracker.progbar.update(count * block_size) error_msg = 'URL fetch failure on {}: {} -- {}' try: try: urlretrieve(origin, fpath, dl_progress) except HTTPError as e: raise Exception(error_msg.format(origin, e.code, e.msg)) except URLError as e: raise Exception(error_msg.format(origin, e.errno, e.reason)) except (Exception, KeyboardInterrupt): if os.path.exists(fpath): os.remove(fpath) raise ProgressTracker.progbar = None if untar: if not os.path.exists(untar_fpath): _extract_archive(fpath, datadir, archive_format='tar') return untar_fpath if extract: _extract_archive(fpath, datadir, archive_format) return fpath def _hash_file(fpath, algorithm='sha256', chunk_size=65535): """Calculates a file sha256 or md5 hash. # Example ```python >>> from keras.data_utils import _hash_file >>> _hash_file('/path/to/file.zip') 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' ``` # Arguments fpath: path to the file being validated algorithm: hash algorithm, one of 'auto', 'sha256', or 'md5'. The default 'auto' detects the hash algorithm in use. chunk_size: Bytes to read at a time, important for large files. # Returns The file hash """ if (algorithm is 'sha256') or (algorithm is 'auto' and len(hash) is 64): hasher = hashlib.sha256() else: hasher = hashlib.md5() with open(fpath, 'rb') as fpath_file: for chunk in iter(lambda: fpath_file.read(chunk_size), b''): hasher.update(chunk) return hasher.hexdigest() def validate_file(fpath, file_hash, algorithm='auto', chunk_size=65535): """Validates a file against a sha256 or md5 hash. # Arguments fpath: path to the file being validated file_hash: The expected hash string of the file. The sha256 and md5 hash algorithms are both supported. algorithm: Hash algorithm, one of 'auto', 'sha256', or 'md5'. The default 'auto' detects the hash algorithm in use. chunk_size: Bytes to read at a time, important for large files. # Returns Whether the file is valid """ if ((algorithm is 'sha256') or (algorithm is 'auto' and len(file_hash) is 64)): hasher = 'sha256' else: hasher = 'md5' if str(_hash_file(fpath, hasher, chunk_size)) == str(file_hash): return True else: return False class Sequence(object): """Base object for fitting to a sequence of data, such as a dataset. Every `Sequence` must implement the `__getitem__` and the `__len__` methods. If you want to modify your dataset between epochs you may implement `on_epoch_end`. The method `__getitem__` should return a complete batch. # Notes `Sequence` are a safer way to do multiprocessing. This structure guarantees that the network will only train once on each sample per epoch which is not the case with generators. # Examples ```python from skimage.io import imread from skimage.transform import resize import numpy as np # Here, `x_set` is list of path to the images # and `y_set` are the associated classes. class CIFAR10Sequence(Sequence): def __init__(self, x_set, y_set, batch_size): self.x, self.y = x_set, y_set self.batch_size = batch_size def __len__(self): return int(np.ceil(len(self.x) / float(self.batch_size))) def __getitem__(self, idx): batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size] batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size] return np.array([ resize(imread(file_name), (200, 200)) for file_name in batch_x]), np.array(batch_y) ``` """ @abstractmethod def __getitem__(self, index): """Gets batch at position `index`. # Arguments index: position of the batch in the Sequence. # Returns A batch """ raise NotImplementedError @abstractmethod def __len__(self): """Number of batch in the Sequence. # Returns The number of batches in the Sequence. """ raise NotImplementedError def on_epoch_end(self): """Method called at the end of every epoch. """ pass def __iter__(self): """Create a generator that iterate over the Sequence.""" for item in (self[i] for i in range(len(self))): yield item # Global variables to be shared across processes _SHARED_SEQUENCES = {} # We use a Value to provide unique id to different processes. _SEQUENCE_COUNTER = None def init_pool(seqs): global _SHARED_SEQUENCES _SHARED_SEQUENCES = seqs def get_index(uid, i): """Get the value from the Sequence `uid` at index `i`. To allow multiple Sequences to be used at the same time, we use `uid` to get a specific one. A single Sequence would cause the validation to overwrite the training Sequence. # Arguments uid: int, Sequence identifier i: index # Returns The value at index `i`. """ return _SHARED_SEQUENCES[uid][i] class SequenceEnqueuer(object): """Base class to enqueue inputs. The task of an Enqueuer is to use parallelism to speed up preprocessing. This is done with processes or threads. # Examples ```python enqueuer = SequenceEnqueuer(...) enqueuer.start() datas = enqueuer.get() for data in datas: # Use the inputs; training, evaluating, predicting. # ... stop sometime. enqueuer.close() ``` The `enqueuer.get()` should be an infinite stream of datas. """ def __init__(self, sequence, use_multiprocessing=False): self.sequence = sequence self.use_multiprocessing = use_multiprocessing global _SEQUENCE_COUNTER if _SEQUENCE_COUNTER is None: try: _SEQUENCE_COUNTER = mp.Value('i', 0) except OSError: # In this case the OS does not allow us to use # multiprocessing. We resort to an int # for enqueuer indexing. _SEQUENCE_COUNTER = 0 if isinstance(_SEQUENCE_COUNTER, int): self.uid = _SEQUENCE_COUNTER _SEQUENCE_COUNTER += 1 else: # Doing Multiprocessing.Value += x is not process-safe. with _SEQUENCE_COUNTER.get_lock(): self.uid = _SEQUENCE_COUNTER.value _SEQUENCE_COUNTER.value += 1 self.workers = 0 self.executor_fn = None self.queue = None self.run_thread = None self.stop_signal = None def is_running(self): return self.stop_signal is not None and not self.stop_signal.is_set() def start(self, workers=1, max_queue_size=10): """Start the handler's workers. # Arguments workers: number of worker threads max_queue_size: queue size (when full, workers could block on `put()`) """ if self.use_multiprocessing: self.executor_fn = self._get_executor_init(workers) else: # We do not need the init since it's threads. self.executor_fn = lambda _: ThreadPool(workers) self.workers = workers self.queue = queue.Queue(max_queue_size) self.stop_signal = threading.Event() self.run_thread = threading.Thread(target=self._run) self.run_thread.daemon = True self.run_thread.start() def _send_sequence(self): """Send current Iterable to all workers.""" # For new processes that may spawn _SHARED_SEQUENCES[self.uid] = self.sequence def stop(self, timeout=None): """Stops running threads and wait for them to exit, if necessary. Should be called by the same thread which called `start()`. # Arguments timeout: maximum time to wait on `thread.join()` """ self.stop_signal.set() with self.queue.mutex: self.queue.queue.clear() self.queue.unfinished_tasks = 0 self.queue.not_full.notify() self.run_thread.join(timeout) _SHARED_SEQUENCES[self.uid] = None @abstractmethod def _run(self): """Submits request to the executor and queue the `Future` objects.""" raise NotImplementedError @abstractmethod def _get_executor_init(self, workers): """Get the Pool initializer for multiprocessing. # Returns Function, a Function to initialize the pool """ raise NotImplementedError @abstractmethod def get(self): """Creates a generator to extract data from the queue. Skip the data if it is `None`. # Returns Generator yielding tuples `(inputs, targets)` or `(inputs, targets, sample_weights)`. """ raise NotImplementedError class OrderedEnqueuer(SequenceEnqueuer): """Builds a Enqueuer from a Sequence. Used in `fit_generator`, `evaluate_generator`, `predict_generator`. # Arguments sequence: A `keras.utils.data_utils.Sequence` object. use_multiprocessing: use multiprocessing if True, otherwise threading shuffle: whether to shuffle the data at the beginning of each epoch """ def __init__(self, sequence, use_multiprocessing=False, shuffle=False): super(OrderedEnqueuer, self).__init__(sequence, use_multiprocessing) self.shuffle = shuffle def _get_executor_init(self, workers): """Get the Pool initializer for multiprocessing. # Returns Function, a Function to initialize the pool """ return lambda seqs: mp.Pool(workers, initializer=init_pool, initargs=(seqs,)) def _wait_queue(self): """Wait for the queue to be empty.""" while True: time.sleep(0.1) if self.queue.unfinished_tasks == 0 or self.stop_signal.is_set(): return def _run(self): """Submits request to the executor and queue the `Future` objects.""" sequence = list(range(len(self.sequence))) self._send_sequence() # Share the initial sequence while True: if self.shuffle: random.shuffle(sequence) with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor: for i in sequence: if self.stop_signal.is_set(): return self.queue.put( executor.apply_async(get_index, (self.uid, i)), block=True) # Done with the current epoch, waiting for the final batches self._wait_queue() if self.stop_signal.is_set(): # We're done return # Call the internal on epoch end. self.sequence.on_epoch_end() self._send_sequence() # Update the pool def get(self): """Creates a generator to extract data from the queue. Skip the data if it is `None`. # Yields The next element in the queue, i.e. a tuple `(inputs, targets)` or `(inputs, targets, sample_weights)`. """ try: while self.is_running(): inputs = self.queue.get(block=True).get() self.queue.task_done() if inputs is not None: yield inputs except Exception as e: self.stop() six.reraise(*sys.exc_info()) def init_pool_generator(gens, random_seed=None): global _SHARED_SEQUENCES _SHARED_SEQUENCES = gens if random_seed is not None: ident = mp.current_process().ident np.random.seed(random_seed + ident) def next_sample(uid): """Get the next value from the generator `uid`. To allow multiple generators to be used at the same time, we use `uid` to get a specific one. A single generator would cause the validation to overwrite the training generator. # Arguments uid: int, generator identifier # Returns The next value of generator `uid`. """ return six.next(_SHARED_SEQUENCES[uid]) class GeneratorEnqueuer(SequenceEnqueuer): """Builds a queue out of a data generator. The provided generator can be finite in which case the class will throw a `StopIteration` exception. Used in `fit_generator`, `evaluate_generator`, `predict_generator`. # Arguments generator: a generator function which yields data use_multiprocessing: use multiprocessing if True, otherwise threading wait_time: time to sleep in-between calls to `put()` random_seed: Initial seed for workers, will be incremented by one for each worker. """ def __init__(self, sequence, use_multiprocessing=False, wait_time=None, random_seed=None): super(GeneratorEnqueuer, self).__init__(sequence, use_multiprocessing) self.random_seed = random_seed if wait_time is not None: warnings.warn('`wait_time` is not used anymore.', DeprecationWarning) def _get_executor_init(self, workers): """Get the Pool initializer for multiprocessing. # Returns Function, a Function to initialize the pool """ return lambda seqs: mp.Pool(workers, initializer=init_pool_generator, initargs=(seqs, self.random_seed)) def _run(self): """Submits request to the executor and queue the `Future` objects.""" self._send_sequence() # Share the initial generator with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor: while True: if self.stop_signal.is_set(): return self.queue.put( executor.apply_async(next_sample, (self.uid,)), block=True) def get(self): """Creates a generator to extract data from the queue. Skip the data if it is `None`. # Yields The next element in the queue, i.e. a tuple `(inputs, targets)` or `(inputs, targets, sample_weights)`. """ try: while self.is_running(): inputs = self.queue.get(block=True).get() self.queue.task_done() if inputs is not None: yield inputs except StopIteration: # Special case for finite generators last_ones = [] while self.queue.qsize() > 0: last_ones.append(self.queue.get(block=True)) # Wait for them to complete list(map(lambda f: f.wait(), last_ones)) # Keep the good ones last_ones = [future.get() for future in last_ones if future.successful()] for inputs in last_ones: if inputs is not None: yield inputs except Exception as e: self.stop() if 'generator already executing' in str(e): raise RuntimeError( "Your generator is NOT thread-safe." "Keras requires a thread-safe generator when" "`use_multiprocessing=False, workers > 1`." "For more information see issue #1638.") six.reraise(*sys.exc_info()) Keras-2.2.4/keras/utils/io_utils.py0000644000000000116100000002652613355226611017006 0ustar rooteng00000000000000"""Utilities related to disk I/O.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from collections import defaultdict import sys import six try: import h5py HDF5_OBJECT_HEADER_LIMIT = 64512 except ImportError: h5py = None if sys.version_info[0] == 3: import pickle else: import cPickle as pickle class HDF5Matrix(object): """Representation of HDF5 dataset to be used instead of a Numpy array. # Example ```python x_data = HDF5Matrix('input/file.hdf5', 'data') model.predict(x_data) ``` Providing `start` and `end` allows use of a slice of the dataset. Optionally, a normalizer function (or lambda) can be given. This will be called on every slice of data retrieved. # Arguments datapath: string, path to a HDF5 file dataset: string, name of the HDF5 dataset in the file specified in datapath start: int, start of desired slice of the specified dataset end: int, end of desired slice of the specified dataset normalizer: function to be called on data when retrieved # Returns An array-like HDF5 dataset. """ refs = defaultdict(int) def __init__(self, datapath, dataset, start=0, end=None, normalizer=None): if h5py is None: raise ImportError('The use of HDF5Matrix requires ' 'HDF5 and h5py installed.') if datapath not in list(self.refs.keys()): f = h5py.File(datapath) self.refs[datapath] = f else: f = self.refs[datapath] self.data = f[dataset] self.start = start if end is None: self.end = self.data.shape[0] else: self.end = end self.normalizer = normalizer if self.normalizer is not None: first_val = self.normalizer(self.data[0:1]) else: first_val = self.data[0:1] self._base_shape = first_val.shape[1:] self._base_dtype = first_val.dtype def __len__(self): return self.end - self.start def __getitem__(self, key): if isinstance(key, slice): start, stop = key.start, key.stop if start is None: start = 0 if stop is None: stop = self.shape[0] if stop + self.start <= self.end: idx = slice(start + self.start, stop + self.start) else: raise IndexError elif isinstance(key, (int, np.integer)): if key + self.start < self.end: idx = key + self.start else: raise IndexError elif isinstance(key, np.ndarray): if np.max(key) + self.start < self.end: idx = (self.start + key).tolist() else: raise IndexError else: # Assume list/iterable if max(key) + self.start < self.end: idx = [x + self.start for x in key] else: raise IndexError if self.normalizer is not None: return self.normalizer(self.data[idx]) else: return self.data[idx] @property def shape(self): """Gets a numpy-style shape tuple giving the dataset dimensions. # Returns A numpy-style shape tuple. """ return (self.end - self.start,) + self._base_shape @property def dtype(self): """Gets the datatype of the dataset. # Returns A numpy dtype string. """ return self._base_dtype @property def ndim(self): """Gets the number of dimensions (rank) of the dataset. # Returns An integer denoting the number of dimensions (rank) of the dataset. """ return self.data.ndim @property def size(self): """Gets the total dataset size (number of elements). # Returns An integer denoting the number of elements in the dataset. """ return np.prod(self.shape) def ask_to_proceed_with_overwrite(filepath): """Produces a prompt asking about overwriting a file. # Arguments filepath: the path to the file to be overwritten. # Returns True if we can proceed with overwrite, False otherwise. """ overwrite = six.moves.input('[WARNING] %s already exists - overwrite? ' '[y/n]' % (filepath)).strip().lower() while overwrite not in ('y', 'n'): overwrite = six.moves.input('Enter "y" (overwrite) or "n" ' '(cancel).').strip().lower() if overwrite == 'n': return False print('[TIP] Next time specify overwrite=True!') return True class H5Dict(object): """ A dict-like wrapper around h5py groups (or dicts). This allows us to have a single serialization logic for both pickling and saving to disk. Note: This is not intended to be a generic wrapper. There are lot of edge cases which have been hardcoded, and makes sense only in the context of model serialization/ deserialization. """ def __init__(self, path, mode='a'): if isinstance(path, h5py.Group): self.data = path self._is_file = False elif isinstance(path, str): self.data = h5py.File(path, mode=mode) self._is_file = True elif isinstance(path, dict): self.data = path self._is_file = False if mode == 'w': self.data.clear() # Flag to check if a dict is user defined data or a sub group: self.data['_is_group'] = True else: raise TypeError('Required Group, str or dict. ' 'Received: {}.'.format(type(path))) self.read_only = mode == 'r' def __setitem__(self, attr, val): if self.read_only: raise ValueError('Cannot set item in read only mode.') is_np = type(val).__module__ == np.__name__ if isinstance(self.data, dict): if isinstance(attr, bytes): attr = attr.decode('utf-8') if is_np: self.data[attr] = pickle.dumps(val) # We have to remember to unpickle in __getitem__ self.data['_{}_pickled'.format(attr)] = True else: self.data[attr] = val return if isinstance(self.data, h5py.Group) and attr in self.data: raise KeyError('Cannot set attribute. ' 'Group with name "{}" exists.'.format(attr)) if is_np: dataset = self.data.create_dataset(attr, val.shape, dtype=val.dtype) if not val.shape: # scalar dataset[()] = val else: dataset[:] = val elif isinstance(val, list): # Check that no item in `data` is larger than `HDF5_OBJECT_HEADER_LIMIT` # because in that case even chunking the array would not make the saving # possible. bad_attributes = [x for x in val if len(x) > HDF5_OBJECT_HEADER_LIMIT] # Expecting this to never be true. if len(bad_attributes) > 0: raise RuntimeError('The following attributes cannot be saved to ' 'HDF5 file because they are larger than ' '%d bytes: %s' % (HDF5_OBJECT_HEADER_LIMIT, ', '.join(bad_attributes))) if val and sys.version_info[0] == 3 and isinstance(val[0], str): # convert to bytes val = [x.encode('utf-8') for x in val] data_npy = np.asarray(val) num_chunks = 1 chunked_data = np.array_split(data_npy, num_chunks) # This will never loop forever thanks to the test above. is_too_big = lambda x: x.nbytes > HDF5_OBJECT_HEADER_LIMIT while any(map(is_too_big, chunked_data)): num_chunks += 1 chunked_data = np.array_split(data_npy, num_chunks) if num_chunks > 1: for chunk_id, chunk_data in enumerate(chunked_data): self.data.attrs['%s%d' % (attr, chunk_id)] = chunk_data else: self.data.attrs[attr] = val else: self.data.attrs[attr] = val def __getitem__(self, attr): if isinstance(self.data, dict): if isinstance(attr, bytes): attr = attr.decode('utf-8') if attr in self.data: val = self.data[attr] if isinstance(val, dict) and val.get('_is_group'): val = H5Dict(val) elif '_{}_pickled'.format(attr) in self.data: val = pickle.loads(val) return val else: if self.read_only: raise ValueError('Cannot create group in read only mode.') val = {'_is_group': True} self.data[attr] = val return H5Dict(val) if attr in self.data.attrs: val = self.data.attrs[attr] if type(val).__module__ == np.__name__: if val.dtype.type == np.string_: val = val.tolist() elif attr in self.data: val = self.data[attr] if isinstance(val, h5py.Dataset): val = np.asarray(val) else: val = H5Dict(val) else: # could be chunked chunk_attr = '%s%d' % (attr, 0) is_chunked = chunk_attr in self.data.attrs if is_chunked: val = [] chunk_id = 0 while chunk_attr in self.data.attrs: chunk = self.data.attrs[chunk_attr] val.extend([x.decode('utf8') for x in chunk]) chunk_id += 1 chunk_attr = '%s%d' % (attr, chunk_id) else: if self.read_only: raise ValueError('Cannot create group in read only mode.') val = H5Dict(self.data.create_group(attr)) return val def __len__(self): return len(self.data) def __iter__(self): return iter(self.data) def iter(self): return iter(self.data) def __getattr__(self, attr): def wrapper(f): def h5wrapper(*args, **kwargs): out = f(*args, **kwargs) if isinstance(self.data, type(out)): return H5Dict(out) else: return out return h5wrapper return wrapper(getattr(self.data, attr)) def close(self): if isinstance(self.data, h5py.Group): self.data.file.flush() if self._is_file: self.data.close() def update(self, *args): if isinstance(self.data, dict): self.data.update(*args) raise NotImplementedError def __contains__(self, key): if isinstance(self.data, dict): return key in self.data else: return (key in self.data) or (key in self.data.attrs) def get(self, key, default=None): if key in self: return self[key] return default h5dict = H5Dict Keras-2.2.4/keras/utils/multi_gpu_utils.py0000644000000000116100000002260713342055016020373 0ustar rooteng00000000000000"""Multi-GPU training utilities. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from ..layers.merge import concatenate from .. import backend as K from ..layers.core import Lambda from ..engine.training import Model from ..models import clone_model from ..utils.generic_utils import to_list def _get_available_devices(): return [x.name for x in K.get_session().list_devices()] def _normalize_device_name(name): name = '/' + ':'.join(name.lower().replace('/', '').split(':')[-2:]) return name def multi_gpu_model(model, gpus=None, cpu_merge=True, cpu_relocation=False): """Replicates a model on different GPUs. Specifically, this function implements single-machine multi-GPU data parallelism. It works in the following way: - Divide the model's input(s) into multiple sub-batches. - Apply a model copy on each sub-batch. Every model copy is executed on a dedicated GPU. - Concatenate the results (on CPU) into one big batch. E.g. if your `batch_size` is 64 and you use `gpus=2`, then we will divide the input into 2 sub-batches of 32 samples, process each sub-batch on one GPU, then return the full batch of 64 processed samples. This induces quasi-linear speedup on up to 8 GPUs. This function is only available with the TensorFlow backend for the time being. # Arguments model: A Keras model instance. To avoid OOM errors, this model could have been built on CPU, for instance (see usage example below). gpus: Integer >= 2 or list of integers, number of GPUs or list of GPU IDs on which to create model replicas. cpu_merge: A boolean value to identify whether to force merging model weights under the scope of the CPU or not. cpu_relocation: A boolean value to identify whether to create the model's weights under the scope of the CPU. If the model is not defined under any preceding device scope, you can still rescue it by activating this option. # Returns A Keras `Model` instance which can be used just like the initial `model` argument, but which distributes its workload on multiple GPUs. # Example 1 - Training models with weights merge on CPU ```python import tensorflow as tf from keras.applications import Xception from keras.utils import multi_gpu_model import numpy as np num_samples = 1000 height = 224 width = 224 num_classes = 1000 # Instantiate the base model (or "template" model). # We recommend doing this with under a CPU device scope, # so that the model's weights are hosted on CPU memory. # Otherwise they may end up hosted on a GPU, which would # complicate weight sharing. with tf.device('/cpu:0'): model = Xception(weights=None, input_shape=(height, width, 3), classes=num_classes) # Replicates the model on 8 GPUs. # This assumes that your machine has 8 available GPUs. parallel_model = multi_gpu_model(model, gpus=8) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') # Generate dummy data. x = np.random.random((num_samples, height, width, 3)) y = np.random.random((num_samples, num_classes)) # This `fit` call will be distributed on 8 GPUs. # Since the batch size is 256, each GPU will process 32 samples. parallel_model.fit(x, y, epochs=20, batch_size=256) # Save model via the template model (which shares the same weights): model.save('my_model.h5') ``` # Example 2 - Training models with weights merge on CPU using cpu_relocation ```python .. # Not needed to change the device scope for model definition: model = Xception(weights=None, ..) try: model = multi_gpu_model(model, cpu_relocation=True) print("Training using multiple GPUs..") except: print("Training using single GPU or CPU..") model.compile(..) .. ``` # Example 3 - Training models with weights merge on GPU (recommended for NV-link) ```python .. # Not needed to change the device scope for model definition: model = Xception(weights=None, ..) try: model = multi_gpu_model(model, cpu_merge=False) print("Training using multiple GPUs..") except: print("Training using single GPU or CPU..") model.compile(..) .. ``` # On model saving To save the multi-gpu model, use `.save(fname)` or `.save_weights(fname)` with the template model (the argument you passed to `multi_gpu_model`), rather than the model returned by `multi_gpu_model`. """ if K.backend() != 'tensorflow': raise ValueError('`multi_gpu_model` is only available ' 'with the TensorFlow backend.') available_devices = _get_available_devices() available_devices = [_normalize_device_name(name) for name in available_devices] if not gpus: # Using all visible GPUs when not specifying `gpus` # e.g. CUDA_VISIBLE_DEVICES=0,2 python keras_mgpu.py gpus = len([x for x in available_devices if 'gpu' in x]) if isinstance(gpus, (list, tuple)): if len(gpus) <= 1: raise ValueError('For multi-gpu usage to be effective, ' 'call `multi_gpu_model` with `len(gpus) >= 2`. ' 'Received: `gpus=%s`' % gpus) num_gpus = len(gpus) target_gpu_ids = gpus else: if gpus <= 1: raise ValueError('For multi-gpu usage to be effective, ' 'call `multi_gpu_model` with `gpus >= 2`. ' 'Received: `gpus=%d`' % gpus) num_gpus = gpus target_gpu_ids = range(num_gpus) import tensorflow as tf target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids] for device in target_devices: if device not in available_devices: raise ValueError( 'To call `multi_gpu_model` with `gpus=%s`, ' 'we expect the following devices to be available: %s. ' 'However this machine only has: %s. ' 'Try reducing `gpus`.' % (gpus, target_devices, available_devices)) def get_slice(data, i, parts): shape = K.shape(data) batch_size = shape[:1] input_shape = shape[1:] step = batch_size // parts if i == parts - 1: size = batch_size - step * i else: size = step size = K.concatenate([size, input_shape], axis=0) stride = K.concatenate([step, input_shape * 0], axis=0) start = stride * i return K.slice(data, start, size) # Relocate the model definition under CPU device scope if needed if cpu_relocation: with tf.device('/cpu:0'): model = clone_model(model) all_outputs = [] for i in range(len(model.outputs)): all_outputs.append([]) # Place a copy of the model on each GPU, # each getting a slice of the inputs. for i, gpu_id in enumerate(target_gpu_ids): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('replica_%d' % gpu_id): inputs = [] # Retrieve a slice of the input. for x in model.inputs: # In-place input splitting which is not only # 5% ~ 12% faster but also less GPU memory # duplication. with tf.device(x.device): input_shape = K.int_shape(x)[1:] slice_i = Lambda(get_slice, output_shape=input_shape, arguments={'i': i, 'parts': num_gpus})(x) inputs.append(slice_i) # Apply model on slice # (creating a model replica on the target device). outputs = model(inputs) outputs = to_list(outputs) # Save the outputs for merging back together later. for o in range(len(outputs)): all_outputs[o].append(outputs[o]) # Deduplicate output names to handle Siamese networks. occurrences = {} for n in model.output_names: if n not in occurrences: occurrences[n] = 1 else: occurrences[n] += 1 conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1} output_names = [] for n in model.output_names: if n in conflict_counter: conflict_counter[n] += 1 n += '_%d' % conflict_counter[n] output_names.append(n) # Merge outputs under expected scope. with tf.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]): merged = [] for name, outputs in zip(output_names, all_outputs): merged.append(concatenate(outputs, axis=0, name=name)) return Model(model.inputs, merged) Keras-2.2.4/keras/utils/vis_utils.py0000644000000000116100000001123213326715636017174 0ustar rooteng00000000000000"""Utilities related to model visualization.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os # `pydot` is an optional dependency, # see `extras_require` in `setup.py`. try: import pydot except ImportError: pydot = None def _check_pydot(): """Raise errors if `pydot` or GraphViz unavailable.""" if pydot is None: raise ImportError( 'Failed to import `pydot`. ' 'Please install `pydot`. ' 'For example with `pip install pydot`.') try: # Attempt to create an image of a blank graph # to check the pydot/graphviz installation. pydot.Dot.create(pydot.Dot()) except OSError: raise OSError( '`pydot` failed to call GraphViz.' 'Please install GraphViz (https://www.graphviz.org/) ' 'and ensure that its executables are in the $PATH.') def model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='TB'): """Convert a Keras model to dot format. # Arguments model: A Keras model instance. show_shapes: whether to display shape information. show_layer_names: whether to display layer names. rankdir: `rankdir` argument passed to PyDot, a string specifying the format of the plot: 'TB' creates a vertical plot; 'LR' creates a horizontal plot. # Returns A `pydot.Dot` instance representing the Keras model. """ from ..layers.wrappers import Wrapper from ..models import Sequential _check_pydot() dot = pydot.Dot() dot.set('rankdir', rankdir) dot.set('concentrate', True) dot.set_node_defaults(shape='record') if isinstance(model, Sequential): if not model.built: model.build() layers = model.layers # Create graph nodes. for layer in layers: layer_id = str(id(layer)) # Append a wrapped layer's label to node's label, if it exists. layer_name = layer.name class_name = layer.__class__.__name__ if isinstance(layer, Wrapper): layer_name = '{}({})'.format(layer_name, layer.layer.name) child_class_name = layer.layer.__class__.__name__ class_name = '{}({})'.format(class_name, child_class_name) # Create node's label. if show_layer_names: label = '{}: {}'.format(layer_name, class_name) else: label = class_name # Rebuild the label as a table including input/output shapes. if show_shapes: try: outputlabels = str(layer.output_shape) except AttributeError: outputlabels = 'multiple' if hasattr(layer, 'input_shape'): inputlabels = str(layer.input_shape) elif hasattr(layer, 'input_shapes'): inputlabels = ', '.join( [str(ishape) for ishape in layer.input_shapes]) else: inputlabels = 'multiple' label = '%s\n|{input:|output:}|{{%s}|{%s}}' % (label, inputlabels, outputlabels) node = pydot.Node(layer_id, label=label) dot.add_node(node) # Connect nodes with edges. for layer in layers: layer_id = str(id(layer)) for i, node in enumerate(layer._inbound_nodes): node_key = layer.name + '_ib-' + str(i) if node_key in model._network_nodes: for inbound_layer in node.inbound_layers: inbound_layer_id = str(id(inbound_layer)) dot.add_edge(pydot.Edge(inbound_layer_id, layer_id)) return dot def plot_model(model, to_file='model.png', show_shapes=False, show_layer_names=True, rankdir='TB'): """Converts a Keras model to dot format and save to a file. # Arguments model: A Keras model instance to_file: File name of the plot image. show_shapes: whether to display shape information. show_layer_names: whether to display layer names. rankdir: `rankdir` argument passed to PyDot, a string specifying the format of the plot: 'TB' creates a vertical plot; 'LR' creates a horizontal plot. """ dot = model_to_dot(model, show_shapes, show_layer_names, rankdir) _, extension = os.path.splitext(to_file) if not extension: extension = 'png' else: extension = extension[1:] dot.write(to_file, format=extension) Keras-2.2.4/keras/utils/np_utils.py0000644000000000116100000000313113342055016016772 0ustar rooteng00000000000000"""Numpy-related utilities.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np def to_categorical(y, num_classes=None, dtype='float32'): """Converts a class vector (integers) to binary class matrix. E.g. for use with categorical_crossentropy. # Arguments y: class vector to be converted into a matrix (integers from 0 to num_classes). num_classes: total number of classes. dtype: The data type expected by the input, as a string (`float32`, `float64`, `int32`...) # Returns A binary matrix representation of the input. The classes axis is placed last. """ y = np.array(y, dtype='int') input_shape = y.shape if input_shape and input_shape[-1] == 1 and len(input_shape) > 1: input_shape = tuple(input_shape[:-1]) y = y.ravel() if not num_classes: num_classes = np.max(y) + 1 n = y.shape[0] categorical = np.zeros((n, num_classes), dtype=dtype) categorical[np.arange(n), y] = 1 output_shape = input_shape + (num_classes,) categorical = np.reshape(categorical, output_shape) return categorical def normalize(x, axis=-1, order=2): """Normalizes a Numpy array. # Arguments x: Numpy array to normalize. axis: axis along which to normalize. order: Normalization order (e.g. 2 for L2 norm). # Returns A normalized copy of the array. """ l2 = np.atleast_1d(np.linalg.norm(x, order, axis)) l2[l2 == 0] = 1 return x / np.expand_dims(l2, axis) Keras-2.2.4/keras/backend/0000755000000000116100000000000013355226624015025 5ustar rooteng00000000000000Keras-2.2.4/keras/backend/__init__.py0000644000000000116100000000756113326715636017153 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import print_function import os import json import sys import importlib from .common import epsilon from .common import floatx from .common import set_epsilon from .common import set_floatx from .common import cast_to_floatx from .common import image_data_format from .common import set_image_data_format from .common import normalize_data_format # Set Keras base dir path given KERAS_HOME env variable, if applicable. # Otherwise either ~/.keras or /tmp. if 'KERAS_HOME' in os.environ: _keras_dir = os.environ.get('KERAS_HOME') else: _keras_base_dir = os.path.expanduser('~') if not os.access(_keras_base_dir, os.W_OK): _keras_base_dir = '/tmp' _keras_dir = os.path.join(_keras_base_dir, '.keras') # Default backend: TensorFlow. _BACKEND = 'tensorflow' # Attempt to read Keras config file. _config_path = os.path.expanduser(os.path.join(_keras_dir, 'keras.json')) if os.path.exists(_config_path): try: with open(_config_path) as f: _config = json.load(f) except ValueError: _config = {} _floatx = _config.get('floatx', floatx()) assert _floatx in {'float16', 'float32', 'float64'} _epsilon = _config.get('epsilon', epsilon()) assert isinstance(_epsilon, float) _backend = _config.get('backend', _BACKEND) _image_data_format = _config.get('image_data_format', image_data_format()) assert _image_data_format in {'channels_last', 'channels_first'} set_floatx(_floatx) set_epsilon(_epsilon) set_image_data_format(_image_data_format) _BACKEND = _backend # Save config file, if possible. if not os.path.exists(_keras_dir): try: os.makedirs(_keras_dir) except OSError: # Except permission denied and potential race conditions # in multi-threaded environments. pass if not os.path.exists(_config_path): _config = { 'floatx': floatx(), 'epsilon': epsilon(), 'backend': _BACKEND, 'image_data_format': image_data_format() } try: with open(_config_path, 'w') as f: f.write(json.dumps(_config, indent=4)) except IOError: # Except permission denied. pass # Set backend based on KERAS_BACKEND flag, if applicable. if 'KERAS_BACKEND' in os.environ: _backend = os.environ['KERAS_BACKEND'] if _backend: _BACKEND = _backend # Import backend functions. if _BACKEND == 'cntk': sys.stderr.write('Using CNTK backend\n') from .cntk_backend import * elif _BACKEND == 'theano': sys.stderr.write('Using Theano backend.\n') from .theano_backend import * elif _BACKEND == 'tensorflow': sys.stderr.write('Using TensorFlow backend.\n') from .tensorflow_backend import * else: # Try and load external backend. try: backend_module = importlib.import_module(_BACKEND) entries = backend_module.__dict__ # Check if valid backend. # Module is a valid backend if it has the required entries. required_entries = ['placeholder', 'variable', 'function'] for e in required_entries: if e not in entries: raise ValueError('Invalid backend. Missing required entry : ' + e) namespace = globals() for k, v in entries.items(): # Make sure we don't override any entries from common, such as epsilon. if k not in namespace: namespace[k] = v sys.stderr.write('Using ' + _BACKEND + ' backend.\n') except ImportError: raise ValueError('Unable to import backend : ' + str(_BACKEND)) def backend(): """Publicly accessible method for determining the current backend. # Returns String, the name of the backend Keras is currently using. # Example ```python >>> keras.backend.backend() 'tensorflow' ``` """ return _BACKEND Keras-2.2.4/keras/backend/theano_backend.py0000644000000000116100000030044613354530144020325 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function from collections import defaultdict from contextlib import contextmanager import theano from theano import tensor as T from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams from theano.tensor.signal import pool from theano.printing import Print from theano.ifelse import ifelse try: import theano.sparse as th_sparse_module except ImportError: th_sparse_module = None try: from theano.tensor.nnet.nnet import softsign as T_softsign except ImportError: from theano.sandbox.softsign import softsign as T_softsign import numpy as np from .common import floatx from .common import epsilon from .common import normalize_data_format from ..utils.generic_utils import transpose_shape from ..utils.generic_utils import has_arg # Legacy functions from .common import set_image_dim_ordering, image_dim_ordering py_all = all py_any = any py_sum = sum py_slice = slice # INTERNAL UTILS theano.config.floatX = floatx() _LEARNING_PHASE = T.scalar(dtype='uint8', name='keras_learning_phase') # 0 = test, 1 = train _UID_PREFIXES = defaultdict(int) def learning_phase(): # False = test, True = train return _LEARNING_PHASE def set_learning_phase(value): global _LEARNING_PHASE if value not in {0, 1}: raise ValueError('Expected learning phase to be ' '0 or 1.') _LEARNING_PHASE = value def get_uid(prefix=''): """Provides a unique UID given a string prefix. # Arguments prefix: string. # Returns An integer. # Example ```python >>> keras.backend.get_uid('dense') 1 >>> keras.backend.get_uid('dense') 2 ``` """ _UID_PREFIXES[prefix] += 1 return _UID_PREFIXES[prefix] def reset_uids(): global _UID_PREFIXES _UID_PREFIXES = defaultdict(int) # VARIABLE MANIPULATION def _assert_sparse_module(): if not th_sparse_module: raise ImportError("Failed to import theano.sparse\n" "You probably need to pip install nose-parameterized") def is_sparse(tensor): return th_sparse_module and isinstance(tensor.type, th_sparse_module.SparseType) def to_dense(tensor): if is_sparse(tensor): return th_sparse_module.dense_from_sparse(tensor) else: return tensor def _is_explicit_shape(shape): if hasattr(shape, '__iter__'): for x in shape: if x is not None: if not isinstance(x, int): return False return True return False NAME_SCOPE_STACK = [] @contextmanager def name_scope(name): global NAME_SCOPE_STACK NAME_SCOPE_STACK.append(name) yield NAME_SCOPE_STACK.pop() def _prepare_name(name, default): prefix = '/'.join(NAME_SCOPE_STACK) if name is None: return prefix + '/' + default return prefix + '/' + name def variable(value, dtype=None, name=None, constraint=None): """Instantiates a variable and returns it. # Arguments value: Numpy array, initial value of the tensor. dtype: Tensor type. name: Optional name string for the tensor. constraint: Optional projection function to be applied to the variable after an optimizer update. # Returns A variable instance (with Keras metadata included). """ if dtype is None: dtype = floatx() if hasattr(value, 'tocoo'): _assert_sparse_module() variable = th_sparse_module.as_sparse_variable( value, name=_prepare_name(name, 'variable')) else: if isinstance(value, (theano.tensor.TensorVariable, theano.tensor.sharedvar.TensorSharedVariable, theano.tensor.TensorConstant)): # Support for RandomStreams().normal(), .uniform(). value = value.eval() value = np.asarray(value, dtype=dtype) variable = theano.shared(value=value, name=_prepare_name(name, 'variable'), strict=False) variable._keras_shape = value.shape variable._uses_learning_phase = False variable.constraint = constraint return variable def constant(value, dtype=None, shape=None, name=None): if dtype is None: dtype = floatx() if shape is None: shape = () np_value = value * np.ones(shape) const = T.constant(np_value, dtype=dtype, name=_prepare_name(name, 'constant')) const._keras_shape = shape const._uses_learning_phase = False return const def is_keras_tensor(x): """Returns whether `x` is a Keras tensor. A "Keras tensor" is a tensor that was returned by a Keras layer, (`Layer` class) or by `Input`. # Arguments x: A candidate tensor. # Returns A boolean: Whether the argument is a Keras tensor. # Raises ValueError: In case `x` is not a symbolic tensor. # Examples ```python >>> from keras import backend as K >>> from keras.layers import Input, Dense >>> np_var = numpy.array([1, 2]) >>> K.is_keras_tensor(np_var) # A numpy array is not a symbolic tensor. ValueError >>> k_var = tf.placeholder('float32', shape=(1,1)) >>> K.is_keras_tensor(k_var) # A variable indirectly created outside of keras is not a Keras tensor. False >>> keras_var = K.variable(np_var) >>> K.is_keras_tensor(keras_var) # A variable created with the keras backend is not a Keras tensor. False >>> keras_placeholder = K.placeholder(shape=(2, 4, 5)) >>> K.is_keras_tensor(keras_placeholder) # A placeholder is not a Keras tensor. False >>> keras_input = Input([10]) >>> K.is_keras_tensor(keras_input) # An Input is a Keras tensor. True >>> keras_layer_output = Dense(10)(keras_input) >>> K.is_keras_tensor(keras_layer_output) # Any Keras layer output is a Keras tensor. True ``` """ if not is_tensor(x): raise ValueError('Unexpectedly found an instance of type `' + str(type(x)) + '`. ' 'Expected a symbolic tensor instance.') return hasattr(x, '_keras_history') def is_tensor(x): return isinstance(x, (T.TensorVariable, T.sharedvar.TensorSharedVariable)) def placeholder(shape=None, ndim=None, dtype=None, sparse=False, name=None): """Instantiate an input data placeholder variable. """ if dtype is None: dtype = floatx() if shape is None and ndim is None: raise ValueError('Specify either a shape or ndim value.') if shape is not None: ndim = len(shape) else: shape = tuple([None for _ in range(ndim)]) name = _prepare_name(name, 'placeholder') broadcast = (False,) * ndim if sparse: _assert_sparse_module() x = th_sparse_module.csr_matrix(name=name, dtype=dtype) else: x = T.TensorType(dtype, broadcast)(name) x._keras_shape = shape x._uses_learning_phase = False x._theano_placeholder = True return x def is_placeholder(x): """Returns whether `x` is a placeholder. # Arguments x: A candidate placeholder. # Returns Boolean. """ return hasattr(x, '_theano_placeholder') and x._theano_placeholder def shape(x): """Returns the shape of a tensor. Warning: type returned will be different for Theano backend (Theano tensor type) and TF backend (TF TensorShape). """ return x.shape def int_shape(x): """Returns the shape of a Keras tensor or a Keras variable as a tuple of integers or None entries. # Arguments x: Tensor or variable. # Returns A tuple of integers (or None entries). """ if hasattr(x, '_keras_shape'): return x._keras_shape else: return None def ndim(x): return x.ndim def dtype(x): return x.dtype def eval(x): """Returns the value of a tensor. """ return to_dense(x).eval() def zeros(shape, dtype=None, name=None): """Instantiates an all-zeros variable. """ if dtype is None: dtype = floatx() return variable(np.zeros(shape), dtype, name) def ones(shape, dtype=None, name=None): """Instantiates an all-ones variable. """ if dtype is None: dtype = floatx() return variable(np.ones(shape), dtype, name) def eye(size, dtype=None, name=None): """Instantiates an identity matrix. """ if dtype is None: dtype = floatx() return variable(np.eye(size), dtype, name) def ones_like(x, dtype=None, name=None): return T.ones_like(x, dtype=dtype) def zeros_like(x, dtype=None, name=None): return T.zeros_like(x, dtype=dtype) def identity(x, name=None): """Returns a tensor with the same content as the input tensor. # Arguments x: The input tensor. name: String, name for the variable to create. # Returns A tensor of the same shape, type and content. """ return x.copy(name=name) def random_uniform_variable(shape, low, high, dtype=None, name=None): return variable(np.random.uniform(low=low, high=high, size=shape), dtype=dtype, name=name) def random_normal_variable(shape, mean, scale, dtype=None, name=None): return variable(np.random.normal(loc=0.0, scale=scale, size=shape), dtype=dtype, name=name) def count_params(x): """Returns the number of scalars in a tensor. Return: numpy integer. """ # We don't want those compilation to show up in Theano profiler. f = theano.function([], x.shape, profile=False) return np.prod(f()) def cast(x, dtype): return T.cast(x, dtype) # UPDATES OPS def update(x, new_x): return (x, new_x) def update_add(x, increment): return (x, x + increment) def update_sub(x, decrement): return (x, x - decrement) def moving_average_update(variable, value, momentum): return (variable, variable * momentum + value * (1. - momentum)) # LINEAR ALGEBRA """ Assumed overridden: +, -, /, *, +=, -=, *=, /= """ def dot(x, y): if is_sparse(x): out = th_sparse_module.basic.structured_dot(x, y) else: out = T.dot(x, y) if hasattr(x, '_keras_shape') and hasattr(y, '_keras_shape'): x_shape = list(x._keras_shape) y_shape = list(y._keras_shape) if len(x_shape) > 0: x_shape.pop() if len(y_shape) == 1: y_shape.pop() elif len(y_shape) > 1: y_shape.pop(-2) out._keras_shape = tuple(x_shape + y_shape) return out def batch_dot(x, y, axes=None): """Batchwise dot product. batch_dot results in a tensor with less dimensions than the input. If the number of dimensions is reduced to 1, we use `expand_dims` to make sure that ndim is at least 2. # Arguments x, y: tensors with ndim >= 2 axes: list (or single) int with target dimensions # Returns A tensor with shape equal to the concatenation of x's shape (less the dimension that was summed over) and y's shape (less the batch dimension and the dimension that was summed over). If the final rank is 1, we reshape it to (batch_size, 1). # Examples Assume x = [[1, 2], [3, 4]] and y = [[5, 6], [7, 8]] batch_dot(x, y, axes=1) = [[17, 53]] which is the main diagonal of x.dot(y.T), although we never have to calculate the off-diagonal elements. Shape inference: Let x's shape be (100, 20) and y's shape be (100, 30, 20). If dot_axes is (1, 2), to find the output shape of resultant tensor, loop through each dimension in x's shape and y's shape: x.shape[0] : 100 : append to output shape x.shape[1] : 20 : do not append to output shape, dimension 1 of x has been summed over. (dot_axes[0] = 1) y.shape[0] : 100 : do not append to output shape, always ignore first dimension of y y.shape[1] : 30 : append to output shape y.shape[2] : 20 : do not append to output shape, dimension 2 of y has been summed over. (dot_axes[1] = 2) output_shape = (100, 30) """ if isinstance(axes, int): axes = (axes, axes) if axes is None: # behaves like tf.batch_matmul as default axes = [x.ndim - 1, y.ndim - 2] if py_any([isinstance(a, (list, tuple)) for a in axes]): raise ValueError('Multiple target dimensions are not supported. ' + 'Expected: None, int, (int, int), ' + 'Provided: ' + str(axes)) if isinstance(axes, tuple): axes = list(axes) # workaround because theano doesn't accept axes # which contains the batch axis (0) if axes[0] == 0: x = transpose(x) axes[0] = x.ndim - 1 if axes[1] == 0: y = transpose(y) axes[1] = y.ndim - 1 out = T.batched_tensordot(x, y, axes=axes) if ndim(out) == 1: out = expand_dims(out, 1) if hasattr(x, '_keras_shape') and hasattr(y, '_keras_shape'): shape = [] for axis in range(len(x._keras_shape)): if axis != axes[0]: shape.append(x._keras_shape[axis]) for axis in range(1, len(y._keras_shape)): if axis != axes[1]: shape.append(y._keras_shape[axis]) if len(shape) == 1: shape.append(1) # Expand dims if ndim == 1 out._keras_shape = tuple(shape) return out def transpose(x): y = T.transpose(x) if hasattr(x, '_keras_shape'): y._keras_shape = tuple(reversed(x._keras_shape)) return y def gather(reference, indices): """Retrieves the elements of indices `indices` in the tensor `reference`. # Arguments reference: A tensor. indices: An integer tensor of indices. # Returns A tensor of same type as `reference`. """ y = reference[indices] if hasattr(reference, '_keras_shape') and hasattr(indices, '_keras_shape'): y._keras_shape = indices._keras_shape + reference._keras_shape[1:] return y # ELEMENT-WISE OPERATIONS def max(x, axis=None, keepdims=False): return T.max(x, axis=axis, keepdims=keepdims) def min(x, axis=None, keepdims=False): return T.min(x, axis=axis, keepdims=keepdims) def sum(x, axis=None, keepdims=False): """Sum of the values in a tensor, alongside the specified axis. """ return T.sum(x, axis=axis, keepdims=keepdims) def prod(x, axis=None, keepdims=False): """Multiply the values in a tensor, alongside the specified axis. """ return T.prod(x, axis=axis, keepdims=keepdims) def cumsum(x, axis=0): """Cumulative sum of the values in a tensor, alongside the specified axis. # Arguments x: A tensor or variable. axis: An integer, the axis to compute the sum. # Returns A tensor of the cumulative sum of values of `x` along `axis`. """ return T.extra_ops.cumsum(x, axis=axis) def cumprod(x, axis=0): """Cumulative product of the values in a tensor, alongside the specified axis. # Arguments x: A tensor or variable. axis: An integer, the axis to compute the product. # Returns A tensor of the cumulative product of values of `x` along `axis`. """ return T.extra_ops.cumprod(x, axis=axis) def mean(x, axis=None, keepdims=False): """Mean of a tensor, alongside the specified axis. """ dtype = None # bool is available since theano v0.9dev if 'int' in x.dtype or x.dtype == 'bool': dtype = floatx() return T.mean(x, axis=axis, keepdims=keepdims, dtype=dtype) def std(x, axis=None, keepdims=False): return T.std(x, axis=axis, keepdims=keepdims) def var(x, axis=None, keepdims=False): return T.var(x, axis=axis, keepdims=keepdims) def any(x, axis=None, keepdims=False): """Bitwise reduction (logical OR). """ y = T.any(x, axis=axis, keepdims=keepdims) y = _set_keras_shape_for_reduction(x, y, axis, keepdims) return y def all(x, axis=None, keepdims=False): """Bitwise reduction (logical AND). """ y = T.all(x, axis=axis, keepdims=keepdims) y = _set_keras_shape_for_reduction(x, y, axis, keepdims) return y def _set_keras_shape_for_reduction(x, y, axis, keepdims): if hasattr(x, '_keras_shape'): if axis is None: y._keras_shape = (1,) * len(x._keras_shape) if keepdims else (1,) else: if isinstance(axis, int): axis_list = [axis] else: axis_list = list(set(int(a) for a in axis)) keras_shape_list = list(x._keras_shape) if keepdims: for a in axis_list: keras_shape_list[a] = 1 else: for a in axis_list[::-1]: keras_shape_list.pop(a) if not keras_shape_list: keras_shape_list = (1,) y._keras_shape = tuple(keras_shape_list) return y def argmax(x, axis=-1): return T.argmax(x, axis=axis, keepdims=False) def argmin(x, axis=-1): return T.argmin(x, axis=axis, keepdims=False) def square(x): return T.sqr(x) def abs(x): return T.abs_(x) def sqrt(x): x = T.clip(x, 0., np.inf) return T.sqrt(x) def exp(x): return T.exp(x) def log(x): return T.log(x) def logsumexp(x, axis=None, keepdims=False): """Computes log(sum(exp(elements across dimensions of a tensor))). This function is more numerically stable than log(sum(exp(x))). It avoids overflows caused by taking the exp of large inputs and underflows caused by taking the log of small inputs. # Arguments x: A tensor or variable. axis: An integer, the axis to reduce over. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. # Returns The reduced tensor. """ # Theano has a built-in optimization for logsumexp (see https://github.com/Theano/Theano/pull/4736) # so we can just write the expression directly: return T.log(T.sum(T.exp(x), axis=axis, keepdims=keepdims)) def round(x): return T.round(x, mode='half_to_even') def sign(x): return T.sgn(x) def pow(x, a): return T.pow(x, a) def clip(x, min_value, max_value): if max_value is not None and max_value < min_value: max_value = min_value if max_value is None: max_value = np.inf return T.clip(x, min_value, max_value) def equal(x, y): return T.eq(x, y) def not_equal(x, y): z = T.neq(x, y) if hasattr(x, '_keras_shape'): z._keras_shape = x._keras_shape elif hasattr(y, '_keras_shape'): z._keras_shape = y._keras_shape return z def greater(x, y): return T.gt(x, y) def greater_equal(x, y): return T.ge(x, y) def less(x, y): return T.lt(x, y) def less_equal(x, y): return T.le(x, y) def maximum(x, y): return T.maximum(x, y) def minimum(x, y): return T.minimum(x, y) def sin(x): return T.sin(x) def cos(x): return T.cos(x) def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3): """Computes mean and std for batch then apply batch_normalization on batch. """ # TODO remove this if statement when Theano without # T.nnet.bn.batch_normalization_train is deprecated if not hasattr(T.nnet.bn, 'batch_normalization_train'): return _old_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon) if gamma is None: if beta is None: gamma = ones_like(x) else: gamma = ones_like(beta) if beta is None: if gamma is None: beta = zeros_like(x) beta = zeros_like(gamma) normed, mean, stdinv = T.nnet.bn.batch_normalization_train( x, gamma, beta, reduction_axes, epsilon) return normed, mean, T.inv(stdinv ** 2) def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3): """Apply batch normalization on x given mean, var, beta and gamma. """ # TODO remove this if statement when Theano without # T.nnet.bn.batch_normalization_test is deprecated if not hasattr(T.nnet.bn, 'batch_normalization_test'): return _old_batch_normalization(x, mean, var, beta, gamma, epsilon) if gamma is None: gamma = ones_like(var) if beta is None: beta = zeros_like(mean) if mean.ndim == 1: # based on TensorFlow's default: normalize along rightmost dimension reduction_axes = list(range(x.ndim - 1)) else: reduction_axes = [i for i in range(x.ndim) if mean.broadcastable[i]] return T.nnet.bn.batch_normalization_test( x, gamma, beta, mean, var, reduction_axes, epsilon) # TODO remove this function when Theano without # T.nnet.bn.batch_normalization_train is deprecated def _old_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3): # pragma: no cover """Computes mean and std for batch then apply batch_normalization on batch. """ if gamma is None: gamma = ones_like(x) if beta is None: beta = zeros_like(x) dev = theano.config.device use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu')) if use_cudnn: broadcast_beta = beta.dimshuffle('x', 0, 'x', 'x') broadcast_gamma = gamma.dimshuffle('x', 0, 'x', 'x') try: normed, mean, stdinv = theano.sandbox.cuda.dnn.dnn_batch_normalization_train( x, broadcast_gamma, broadcast_beta, 'spatial', epsilon) normed = theano.tensor.as_tensor_variable(normed) mean = theano.tensor.as_tensor_variable(mean) stdinv = theano.tensor.as_tensor_variable(stdinv) var = T.inv(stdinv ** 2) return normed, T.flatten(mean), T.flatten(var) except AttributeError: pass var = x.var(reduction_axes) mean = x.mean(reduction_axes) target_shape = [] for axis in range(ndim(x)): if axis in reduction_axes: target_shape.append(1) else: target_shape.append(x.shape[axis]) target_shape = T.stack(*target_shape) broadcast_mean = T.reshape(mean, target_shape) broadcast_var = T.reshape(var, target_shape) broadcast_beta = T.reshape(beta, target_shape) broadcast_gamma = T.reshape(gamma, target_shape) normed = batch_normalization(x, broadcast_mean, broadcast_var, broadcast_beta, broadcast_gamma, epsilon) return normed, mean, var # TODO remove this if statement when Theano without # T.nnet.bn.batch_normalization_test is deprecated def _old_batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3): # pragma: no cover """Apply batch normalization on x given mean, var, beta and gamma. """ if gamma is None: gamma = ones_like(var) if beta is None: beta = zeros_like(mean) if mean.ndim == 1 and x.ndim > 1: # in TensorFlow's batch_normalization, if the parameters are vectors # the batch normalization should be applied along the rightmost axis. # Theano expects the parameters to always have x.ndim dimensions. shuffle_pattern = ['x'] * (x.ndim - 1) + [0] mean = mean.dimshuffle(shuffle_pattern) var = var.dimshuffle(shuffle_pattern) beta = beta.dimshuffle(shuffle_pattern) gamma = gamma.dimshuffle(shuffle_pattern) ndim = x.ndim dev = theano.config.device use_cudnn = ndim < 5 and (dev.startswith('cuda') or dev.startswith('gpu')) if use_cudnn: try: axis = mean.broadcastable.index(False) if axis != 1: shuffle_pattern = list(range(ndim)) shuffle_pattern[1] = shuffle_pattern[axis] shuffle_pattern[axis] = 1 result = theano.sandbox.cuda.dnn.dnn_batch_normalization_test( x.dimshuffle(shuffle_pattern), gamma.dimshuffle(shuffle_pattern), beta.dimshuffle(shuffle_pattern), mean.dimshuffle(shuffle_pattern), var.dimshuffle(shuffle_pattern), 'spatial', epsilon).dimshuffle(shuffle_pattern) else: result = theano.sandbox.cuda.dnn.dnn_batch_normalization_test( x, gamma, beta, mean, var, 'spatial', epsilon) return theano.tensor.as_tensor_variable(result) except AttributeError: pass except ValueError: pass return T.nnet.bn.batch_normalization(x, gamma, beta, mean, sqrt(var + epsilon), mode='high_mem') # SHAPE OPERATIONS def concatenate(tensors, axis=-1): if py_all([is_sparse(x) for x in tensors]): axis = axis % ndim(tensors[0]) if axis == 0: output = th_sparse_module.basic.vstack(tensors, format='csr') elif axis == 1: output = th_sparse_module.basic.hstack(tensors, format='csr') else: raise ValueError('Invalid concat axis for sparse matrix:', axis) else: output = T.concatenate([to_dense(x) for x in tensors], axis=axis) if py_all([hasattr(tensor, '_keras_shape') for tensor in tensors]): input_shapes = [tensor._keras_shape for tensor in tensors] output_shape = list(input_shapes[0]) for shape in input_shapes[1:]: if output_shape[axis] is None or shape[axis] is None: output_shape[axis] = None break output_shape[axis] += shape[axis] output._keras_shape = tuple(output_shape) return output def reshape(x, shape): y = T.reshape(x, shape) shape = tuple(x if isinstance(x, int) and x > 0 else None for x in shape) y._keras_shape = shape if hasattr(x, '_uses_learning_phase'): y._uses_learning_phase = x._uses_learning_phase else: y._uses_learning_phase = False return y def permute_dimensions(x, pattern): """Transpose dimensions. pattern should be a tuple or list of dimension indices, e.g. [0, 2, 1]. """ pattern = tuple(pattern) y = x.dimshuffle(pattern) if hasattr(x, '_keras_shape'): y._keras_shape = tuple(np.asarray(x._keras_shape)[list(pattern)]) return y def repeat_elements(x, rep, axis): """Repeat the elements of a tensor along an axis, like np.repeat. If x has shape (s1, s2, s3) and axis=1, the output will have shape (s1, s2 * rep, s3). """ y = T.repeat(x, rep, axis=axis) if hasattr(x, '_keras_shape'): y._keras_shape = list(x._keras_shape) repeat_dim = x._keras_shape[axis] if repeat_dim is not None: y._keras_shape[axis] = repeat_dim * rep y._keras_shape = tuple(y._keras_shape) return y def resize_images(x, height_factor, width_factor, data_format, interpolation='nearest'): """Resize the images contained in a 4D tensor of shape - [batch, channels, height, width] (for 'channels_first' data_format) - [batch, height, width, channels] (for 'channels_last' data_format) by a factor of (height_factor, width_factor). Both factors should be positive integers. """ if data_format == 'channels_first': axis_1 = 2 axis_2 = 3 elif data_format == 'channels_last': axis_1 = 1 axis_2 = 2 else: raise ValueError('Invalid data_format:', data_format) if interpolation == 'nearest': output = repeat_elements(x, height_factor, axis=axis_1) output = repeat_elements(output, width_factor, axis=axis_2) elif interpolation == 'bilinear': if not (height_factor == width_factor == 2): raise NotImplementedError( 'Bilinear upscaling with factors other than (2, 2)' 'is not available when using the Theano backend.') if data_format == 'channels_last': output = permute_dimensions(x, [0, 3, 1, 2]) else: output = x output = T.nnet.abstract_conv.bilinear_upsampling(output, ratio=height_factor) if data_format == 'channels_last': output = permute_dimensions(output, [0, 2, 3, 1]) if hasattr(x, '_keras_shape'): output._keras_shape = list(x._keras_shape) output._keras_shape[axis_1] *= height_factor output._keras_shape[axis_2] *= width_factor output._keras_shape = tuple(output._keras_shape) else: raise ValueError('interpolation should be one of "nearest" or "bilinear".') return output def resize_volumes(x, depth_factor, height_factor, width_factor, data_format): """Resize the volume contained in a 5D tensor of shape - [batch, channels, depth, height, width] (for 'channels_first' data_format) - [batch, depth, height, width, channels] (for 'channels_last' data_format) by a factor of (depth_factor, height_factor, width_factor). Both factors should be positive integers. """ if data_format == 'channels_first': output = repeat_elements(x, depth_factor, axis=2) output = repeat_elements(output, height_factor, axis=3) output = repeat_elements(output, width_factor, axis=4) return output elif data_format == 'channels_last': output = repeat_elements(x, depth_factor, axis=1) output = repeat_elements(output, height_factor, axis=2) output = repeat_elements(output, width_factor, axis=3) return output else: raise ValueError('Invalid data_format:', data_format) def repeat(x, n): """Repeat a 2D tensor. If x has shape (samples, dim) and n=2, the output will have shape (samples, 2, dim). """ assert x.ndim == 2 y = x.dimshuffle((0, 'x', 1)) y = T.extra_ops.repeat(y, n, axis=1) if hasattr(x, '_keras_shape'): shape = list(x._keras_shape) shape.insert(1, n) y._keras_shape = tuple(shape) return y def arange(start, stop=None, step=1, dtype='int32'): """Creates a 1-D tensor containing a sequence of integers. The function arguments use the same convention as Theano's arange: if only one argument is provided, it is in fact the "stop" argument. The default type of the returned tensor is 'int32' to match TensorFlow's default. """ return T.arange(start, stop=stop, step=step, dtype=dtype) def tile(x, n): y = T.tile(x, n) if hasattr(x, '_keras_shape'): if _is_explicit_shape(n): output_shape = x._keras_shape[:-len(n)] for i, j in zip(x._keras_shape, n): if i is None: output_shape += (None,) else: output_shape += (i * j,) elif isinstance(n, int): output_shape = x._keras_shape[:-1] if x._keras_shape[-1] is None: output_shape += (None,) else: output_shape += (x._keras_shape[-1] * n,) else: # symbolic n if n.ndim == 0: # n is a scalar output_shape = x._keras_shape[:-1] + (None,) elif hasattr(n, '_keras_shape'): # n is a vector n_size = n._keras_shape[0] output_shape = x._keras_shape[:-n_size] + (None,) * n_size else: output_shape = (None,) * x.ndim y._keras_shape = output_shape return y def flatten(x): y = T.flatten(x) if hasattr(x, '_keras_shape'): if None in x._keras_shape: y._keras_shape = (None,) else: y._keras_shape = (np.prod(x._keras_shape), ) return y def batch_flatten(x): """Turn a n-D tensor into a 2D tensor where the first dimension is conserved. """ y = T.reshape(x, (x.shape[0], T.prod(x.shape[1:]))) if hasattr(x, '_keras_shape'): if None in x._keras_shape[1:]: y._keras_shape = (x._keras_shape[0], None) else: y._keras_shape = (x._keras_shape[0], np.prod(x._keras_shape[1:])) return y def expand_dims(x, axis=-1): """Add a 1-sized dimension at index "dim". """ pattern = [i for i in range(x.type.ndim)] if axis < 0: if x.type.ndim == 0: axis = 0 else: axis = axis % x.type.ndim + 1 pattern.insert(axis, 'x') y = x.dimshuffle(pattern) if hasattr(x, '_keras_shape'): shape = list(x._keras_shape) shape.insert(axis, 1) y._keras_shape = tuple(shape) return y def squeeze(x, axis): """Remove a 1-dimension from the tensor at index "axis". """ shape = list(x.shape) shape.pop(axis) y = T.reshape(x, tuple(shape)) if hasattr(x, '_keras_shape'): kshape = list(x._keras_shape) kshape.pop(axis) y._keras_shape = tuple(kshape) return y def temporal_padding(x, padding=(1, 1)): """Pad the middle dimension of a 3D tensor with "padding" zeros left and right. Apologies for the inane API, but Theano makes this really hard. """ assert len(padding) == 2 input_shape = x.shape output_shape = (input_shape[0], input_shape[1] + padding[0] + padding[1], input_shape[2]) output = T.zeros(output_shape) result = T.set_subtensor(output[:, padding[0]:x.shape[1] + padding[0], :], x) if hasattr(x, '_keras_shape'): result._keras_shape = (x._keras_shape[0], x._keras_shape[1] + py_sum(padding), x._keras_shape[2]) return result def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None): """Pad the 2nd and 3rd dimensions of a 4D tensor with "padding[0]" and "padding[1]" (resp.) zeros left and right. """ assert len(padding) == 2 assert len(padding[0]) == 2 assert len(padding[1]) == 2 top_pad, bottom_pad = padding[0] left_pad, right_pad = padding[1] data_format = normalize_data_format(data_format) input_shape = x.shape if data_format == 'channels_first': output_shape = (input_shape[0], input_shape[1], input_shape[2] + top_pad + bottom_pad, input_shape[3] + left_pad + right_pad) output = T.zeros(output_shape) indices = (py_slice(None), py_slice(None), py_slice(top_pad, input_shape[2] + top_pad), py_slice(left_pad, input_shape[3] + left_pad)) else: output_shape = (input_shape[0], input_shape[1] + top_pad + bottom_pad, input_shape[2] + left_pad + right_pad, input_shape[3]) output = T.zeros(output_shape) indices = (py_slice(None), py_slice(top_pad, input_shape[1] + top_pad), py_slice(left_pad, input_shape[2] + left_pad), py_slice(None)) y = T.set_subtensor(output[indices], x) if hasattr(x, '_keras_shape'): if data_format == 'channels_first': if x._keras_shape[2] is not None: h = x._keras_shape[2] + top_pad + bottom_pad else: h = None if x._keras_shape[3] is not None: w = x._keras_shape[3] + left_pad + right_pad else: w = None output_keras_shape = (x._keras_shape[0], x._keras_shape[1], h, w) else: if x._keras_shape[1] is not None: h = x._keras_shape[1] + top_pad + bottom_pad else: h = None if x._keras_shape[2] is not None: w = x._keras_shape[2] + left_pad + right_pad else: w = None output_keras_shape = (x._keras_shape[0], h, w, x._keras_shape[3]) y._keras_shape = output_keras_shape return y def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None): """Pad the 2nd, 3rd and 4th dimensions of a 5D tensor with "padding[0]", "padding[1]" and "padding[2]" (resp.) zeros left and right. """ data_format = normalize_data_format(data_format) input_shape = x.shape if data_format == 'channels_first': output_shape = (input_shape[0], input_shape[1], input_shape[2] + padding[0][0] + padding[0][1], input_shape[3] + padding[1][0] + padding[1][1], input_shape[4] + padding[2][0] + padding[2][1]) output = T.zeros(output_shape) indices = (py_slice(None), py_slice(None), py_slice(padding[0][0], input_shape[2] + padding[0][0]), py_slice(padding[1][0], input_shape[3] + padding[1][0]), py_slice(padding[2][0], input_shape[4] + padding[2][0])) else: output_shape = (input_shape[0], input_shape[1] + padding[0][0] + padding[0][1], input_shape[2] + padding[1][0] + padding[1][1], input_shape[3] + padding[2][0] + padding[2][1], input_shape[4]) output = T.zeros(output_shape) indices = (py_slice(None), py_slice(padding[0][0], input_shape[1] + padding[0][0]), py_slice(padding[1][0], input_shape[2] + padding[1][0]), py_slice(padding[2][0], input_shape[3] + padding[2][0]), py_slice(None)) y = T.set_subtensor(output[indices], x) if hasattr(x, '_keras_shape'): if data_format == 'channels_first': if x._keras_shape[2] is not None: h = x._keras_shape[2] + padding[0][0] + padding[0][1] else: h = None if x._keras_shape[3] is not None: w = x._keras_shape[3] + padding[1][0] + padding[1][1] else: w = None if x._keras_shape[4] is not None: d = x._keras_shape[4] + padding[2][0] + padding[2][1] else: d = None output_keras_shape = (x._keras_shape[0], x._keras_shape[1], h, w, d) else: if x._keras_shape[1] is not None: h = x._keras_shape[1] + padding[0][0] + padding[0][1] else: h = None if x._keras_shape[2] is not None: w = x._keras_shape[2] + padding[1][0] + padding[1][1] else: w = None if x._keras_shape[3] is not None: d = x._keras_shape[3] + padding[2][0] + padding[2][1] else: d = None output_keras_shape = (x._keras_shape[0], h, w, d, x._keras_shape[4]) y._keras_shape = output_keras_shape return y def stack(x, axis=0): return T.stack(x, axis=axis) def one_hot(indices, num_classes): """Input: nD integer tensor of shape (batch_size, dim1, dim2, ... dim(n-1)) Output: (n + 1)D one hot representation of the input with shape (batch_size, dim1, dim2, ... dim(n-1), num_classes) """ input_shape = tuple((indices.shape[i] for i in range(indices.ndim))) indices = T.flatten(indices) oh = T.extra_ops.to_one_hot(indices, num_classes) oh = T.reshape(oh, input_shape + (num_classes,)) return oh def reverse(x, axes): """Reverse a tensor along the specified axes """ if isinstance(axes, int): axes = [axes] slices = [py_slice(None, None, -1) if i in axes else py_slice(None, None, None) for i in range(x.ndim)] return x[slices] def slice(x, start, size): raise NotImplementedError def pattern_broadcast(x, broadcastable): return T.patternbroadcast(x, broadcastable) # VALUE MANIPULATION def get_value(x): if not hasattr(x, 'get_value'): raise TypeError('`get_value` can only be called on a variable. ' 'If you have an expression instead, use `eval()`.') return x.get_value() def batch_get_value(xs): """Returns the value of more than one tensor variable, as a list of Numpy arrays. """ return [get_value(x) for x in xs] def set_value(x, value): x.set_value(np.asarray(value, dtype=x.dtype)) def batch_set_value(tuples): for x, value in tuples: x.set_value(np.asarray(value, dtype=x.dtype)) def get_variable_shape(x): return x.get_value(borrow=True, return_internal_type=True).shape def print_tensor(x, message=''): """Print the message and the tensor when evaluated and return the same tensor. """ p_op = Print(message) return p_op(x) # GRAPH MANIPULATION class Function(object): def __init__(self, inputs, outputs, updates=[], name=None, **kwargs): unique_variables_to_update = {} for v, nv in updates: if v not in unique_variables_to_update: unique_variables_to_update[v] = nv updates = unique_variables_to_update.items() self.function = theano.function(inputs, outputs, updates=updates, allow_input_downcast=True, on_unused_input='ignore', name=name, **kwargs) self.name = name def __call__(self, inputs): assert isinstance(inputs, (list, tuple)) return self.function(*inputs) def function(inputs, outputs, updates=[], **kwargs): if len(kwargs) > 0: for key in kwargs.keys(): if not has_arg(theano.function, key, True): msg = 'Invalid argument "%s" passed to K.function with Theano backend' % key raise ValueError(msg) return Function(inputs, outputs, updates=updates, **kwargs) def gradients(loss, variables): return T.grad(loss, variables) def stop_gradient(variables): """Returns `variables` but with zero gradient w.r.t. every other variable. # Arguments variables: tensor or list of tensors to consider constant with respect to any other variable. # Returns A single tensor or a list of tensors (depending on the passed argument) that has constant gradient with respect to any other variable. """ if isinstance(variables, (list, tuple)): return map(theano.gradient.disconnected_grad, variables) else: return theano.gradient.disconnected_grad(variables) # CONTROL FLOW def rnn(step_function, inputs, initial_states, go_backwards=False, mask=None, constants=None, unroll=False, input_length=None): """Iterates over the time dimension of a tensor. # Arguments step_function: Parameters: inputs: Tensor with shape (samples, ...) (no time dimension), representing input for the batch of samples at a certain time step. states: List of tensors. Returns: outputs: Tensor with shape (samples, ...) (no time dimension), new_states: List of tensors, same length and shapes as 'states'. inputs: Tensor of temporal data of shape (samples, time, ...) (at least 3D). initial_states: Tensor with shape (samples, ...) (no time dimension), containing the initial values for the states used in the step function. go_backwards: Boolean. If True, do the iteration over the time dimension in reverse order and return the reversed sequence. mask: Binary tensor with shape (samples, time), with a zero for every element that is masked. constants: A list of constant values passed at each step. unroll: Whether to unroll the RNN or to use a symbolic loop (`while_loop` or `scan` depending on backend). input_length: Static number of timesteps in the input. Must be specified if using `unroll`. # Returns A tuple (last_output, outputs, new_states). last_output: The latest output of the rnn, of shape `(samples, ...)` outputs: Tensor with shape `(samples, time, ...)` where each entry `outputs[s, t]` is the output of the step function at time `t` for sample `s`. new_states: List of tensors, latest states returned by the step function, of shape `(samples, ...)`. """ ndim = inputs.ndim assert ndim >= 3, 'Input should be at least 3D.' if unroll: if input_length is None: raise ValueError('When specifying `unroll=True`, ' 'an `input_length` ' 'must be provided to `rnn`.') axes = [1, 0] + list(range(2, ndim)) inputs = inputs.dimshuffle(axes) if constants is None: constants = [] global uses_learning_phase uses_learning_phase = False if mask is not None: if mask.ndim == ndim - 1: mask = expand_dims(mask) assert mask.ndim == ndim mask = mask.dimshuffle(axes) if unroll: indices = list(range(input_length)) if go_backwards: indices = indices[::-1] successive_outputs = [] successive_states = [] states = initial_states for i in indices: output, new_states = step_function(inputs[i], states + constants) if getattr(output, '_uses_learning_phase', False): uses_learning_phase = True if len(successive_outputs) == 0: prev_output = zeros_like(output) else: prev_output = successive_outputs[-1] output = T.switch(mask[i], output, prev_output) kept_states = [] for state, new_state in zip(states, new_states): kept_states.append(T.switch(mask[i], new_state, state)) states = kept_states successive_outputs.append(output) successive_states.append(states) outputs = T.stack(*successive_outputs) states = [] for i in range(len(successive_states[-1])): states.append(T.stack(*[states_at_step[i] for states_at_step in successive_states])) else: # build an all-zero tensor of shape (samples, output_dim) initial_output = step_function(inputs[0], initial_states + constants)[0] * 0 # Theano gets confused by broadcasting patterns in the scan op initial_output = T.unbroadcast(initial_output, 0, 1) if len(initial_states) > 0: initial_states[0] = T.unbroadcast(initial_states[0], 0, 1) def _step(inputs, mask, output_tm1, *states): outputs, new_states = step_function(inputs, states) if getattr(outputs, '_uses_learning_phase', False): global uses_learning_phase uses_learning_phase = True # output previous output if masked. outputs = T.switch(mask, outputs, output_tm1) return_states = [] for state, new_state in zip(states, new_states): return_states.append(T.switch(mask, new_state, state)) return [outputs] + return_states results, _ = theano.scan( _step, sequences=[inputs, mask], outputs_info=[initial_output] + initial_states, non_sequences=constants, go_backwards=go_backwards) # deal with Theano API inconsistency if isinstance(results, list): outputs = results[0] states = results[1:] else: outputs = results states = [] else: if unroll: indices = list(range(input_length)) if go_backwards: indices = indices[::-1] successive_outputs = [] successive_states = [] states = initial_states for i in indices: outputs, states = step_function(inputs[i], states + constants) if getattr(outputs, '_uses_learning_phase', False): uses_learning_phase = True successive_outputs.append(outputs) successive_states.append(states) outputs = T.stack(*successive_outputs) states = [] for i in range(len(successive_states[-1])): states.append(T.stack(*[states_at_step[i] for states_at_step in successive_states])) else: def _step(inputs, *states): outputs, new_states = step_function(inputs, states) if getattr(outputs, '_uses_learning_phase', False): global uses_learning_phase uses_learning_phase = True return [outputs] + new_states # Theano likes to make shape==1 dimensions # in the initial states (outputs_info) broadcastable if len(initial_states) > 0: initial_states[0] = T.unbroadcast(initial_states[0], 0, 1) results, _ = theano.scan( _step, sequences=inputs, outputs_info=[None] + initial_states, non_sequences=constants, go_backwards=go_backwards) # deal with Theano API inconsistency if isinstance(results, list): outputs = results[0] states = results[1:] else: outputs = results states = [] outputs = T.squeeze(outputs) last_output = outputs[-1] axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) states = [T.squeeze(state[-1]) for state in states] last_output._uses_learning_phase = uses_learning_phase return last_output, outputs, states def switch(condition, then_expression, else_expression): """Switches between two operations depending on a scalar value. Note that both `then_expression` and `else_expression` should be symbolic tensors of the *same shape*. # Arguments condition: scalar tensor (`int` or `bool`). then_expression: either a tensor, or a callable that returns a tensor. else_expression: either a tensor, or a callable that returns a tensor. # Returns The selected tensor. """ if callable(then_expression): then_expression = then_expression() if callable(else_expression): else_expression = else_expression() cond_ndim = ndim(condition) expr_ndim = ndim(then_expression) if cond_ndim < expr_ndim: ndim_diff = expr_ndim - cond_ndim for _ in range(ndim_diff): condition = expand_dims(condition) return T.switch(condition, then_expression, else_expression) def in_train_phase(x, alt, training=None): """Selects `x` in train phase, and `alt` otherwise. Note that `alt` should have the *same shape* as `x`. # Returns Either `x` or `alt` based on the `training` flag. the `training` flag defaults to `K.learning_phase()`. """ if training is None: training = learning_phase() uses_learning_phase = True else: uses_learning_phase = False if training is 1 or training is True: if callable(x): return x() else: return x elif training is 0 or training is False: if callable(alt): return alt() else: return alt if callable(x): x = x() if callable(alt): alt = alt() # else: assume learning phase is a placeholder tensor. x = ifelse(training, x, alt) if uses_learning_phase: x._uses_learning_phase = True return x def in_test_phase(x, alt, training=None): """Selects `x` in test phase, and `alt` otherwise. Note that `alt` should have the *same shape* as `x`. # Returns Either `x` or `alt` based on `K.learning_phase`. """ return in_train_phase(alt, x, training=training) # NN OPERATIONS def _assert_has_capability(module, func): if not hasattr(module, func): raise EnvironmentError( 'It looks like like your version of ' 'Theano is out of date. ' 'Install the latest version with:\n' 'pip install git+git://github.com/Theano/Theano.git ' '--upgrade --no-deps') def elu(x, alpha=1.0): """ Exponential linear unit # Arguments x: Tensor to compute the activation function for. alpha: scalar """ _assert_has_capability(T.nnet, 'elu') return T.nnet.elu(x, alpha) def relu(x, alpha=0., max_value=None, threshold=0.): _assert_has_capability(T.nnet, 'relu') if alpha != 0.: if threshold != 0.: negative_part = T.nnet.relu(-x + threshold) else: negative_part = T.nnet.relu(-x) if threshold != 0.: x = x * T.cast(T.gt(x, threshold), floatx()) else: x = T.nnet.relu(x) if max_value is not None: x = T.clip(x, 0.0, max_value) if alpha != 0.: x -= alpha * negative_part return x def softmax(x, axis=-1): if (axis == -1 or axis == x.ndim - 1) and x.ndim == 2: return T.nnet.softmax(x) xm = x.max(axis=axis, keepdims=True) return T.exp(x - xm) / T.exp( x - xm).sum(axis=axis, keepdims=True) def softplus(x): return T.nnet.softplus(x) def softsign(x): return T_softsign(x) def categorical_crossentropy(target, output, from_logits=False, axis=-1): output_dimensions = list(range(len(int_shape(output)))) if axis != -1 and axis not in output_dimensions: raise ValueError( '{}{}{}'.format( 'Unexpected channels axis {}. '.format(axis), 'Expected to be -1 or one of the axes of `output`, ', 'which has {} dimensions.'.format(len(int_shape(output))))) # If the channels are not in the last axis, move them to be there: if axis != -1 and axis != output_dimensions[-1]: permutation = output_dimensions[:axis] permutation += output_dimensions[axis + 1:] + [axis] output = permute_dimensions(output, permutation) target = permute_dimensions(target, permutation) if from_logits: output = T.nnet.softmax(output) else: # scale preds so that the class probas of each sample sum to 1 output /= output.sum(axis=-1, keepdims=True) # avoid numerical instability with _EPSILON clipping output = T.clip(output, epsilon(), 1.0 - epsilon()) return T.nnet.categorical_crossentropy(output, target) def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1): output_dimensions = list(range(len(int_shape(output)))) if axis != -1 and axis not in output_dimensions: raise ValueError( '{}{}{}'.format( 'Unexpected channels axis {}. '.format(axis), 'Expected to be -1 or one of the axes of `output`, ', 'which has {} dimensions.'.format(len(int_shape(output))))) # If the channels are not in the last axis, move them to be there: if axis != -1 and axis != output_dimensions[-1]: permutation = output_dimensions[:axis] permutation += output_dimensions[axis + 1:] + [axis] output = permute_dimensions(output, permutation) target = permute_dimensions(target, permutation) target = T.cast(T.flatten(target), 'int32') target = T.extra_ops.to_one_hot(target, nb_class=output.shape[-1]) target = reshape(target, shape(output)) return categorical_crossentropy(target, output, from_logits, axis=-1) def binary_crossentropy(target, output, from_logits=False): if from_logits: output = T.nnet.sigmoid(output) # avoid numerical instability with _EPSILON clipping output = T.clip(output, epsilon(), 1.0 - epsilon()) return T.nnet.binary_crossentropy(output, target) def sigmoid(x): return T.nnet.sigmoid(x) def hard_sigmoid(x): return T.nnet.hard_sigmoid(x) def tanh(x): return T.tanh(x) def dropout(x, level, noise_shape=None, seed=None): """Sets entries in `x` to zero at random, while scaling the entire tensor. # Arguments x: tensor level: fraction of the entries in the tensor that will be set to 0. noise_shape: shape for randomly generated keep/drop flags, must be broadcastable to the shape of `x` seed: random seed to ensure determinism. """ if level < 0. or level >= 1: raise ValueError('Dropout level must be in interval [0, 1[.') if seed is None: seed = np.random.randint(1, 10e6) if isinstance(noise_shape, list): noise_shape = tuple(noise_shape) rng = RandomStreams(seed=seed) retain_prob = 1. - level if noise_shape is None: random_tensor = rng.binomial(x.shape, p=retain_prob, dtype=x.dtype) else: random_tensor = rng.binomial(noise_shape, p=retain_prob, dtype=x.dtype) random_tensor = T.patternbroadcast(random_tensor, [dim == 1 for dim in noise_shape]) x *= random_tensor x /= retain_prob return x def l2_normalize(x, axis=None): square_sum = T.sum(T.square(x), axis=axis, keepdims=True) norm = T.sqrt(T.maximum(square_sum, epsilon())) return x / norm def in_top_k(predictions, targets, k): """Returns whether the `targets` are in the top `k` `predictions`. # Arguments predictions: A tensor of shape `(batch_size, classes)` and type `float32`. targets: A 1D tensor of length `batch_size` and type `int32` or `int64`. k: An `int`, number of top elements to consider. # Returns A 1D tensor of length `batch_size` and type `bool`. `output[i]` is `True` if `predictions[i, targets[i]]` is within top-`k` values of `predictions[i]`. """ # handle k < 1 and k >= predictions.shape[1] cases to match TF behavior if k < 1: # dtype='bool' is only available since Theano 0.9.0 try: return T.zeros_like(targets, dtype='bool') except TypeError: return T.zeros_like(targets, dtype='int8') if k >= int_shape(predictions)[1]: try: return T.ones_like(targets, dtype='bool') except TypeError: return T.ones_like(targets, dtype='int8') predictions_k = T.sort(predictions)[:, -k] targets_values = predictions[T.arange(targets.shape[0]), targets] return T.ge(targets_values, predictions_k) # CONVOLUTIONS def _preprocess_conv2d_input(x, data_format): if data_format == 'channels_last': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, rows, cols) # TF input shape: (samples, rows, cols, input_depth) x = x.dimshuffle((0, 3, 1, 2)) return x def _preprocess_conv3d_input(x, data_format): if data_format == 'channels_last': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, rows, cols, slices) # TF input shape: (samples, rows, cols, slices, input_depth) x = x.dimshuffle((0, 4, 1, 2, 3)) return x def _preprocess_conv2d_kernel(kernel, data_format): # As of Keras 2.0.0, all kernels are normalized # on the format `(rows, cols, input_depth, depth)`, # independently of `data_format`. # Theano expects `(depth, input_depth, rows, cols)`. kernel = kernel.dimshuffle((3, 2, 0, 1)) return kernel def _preprocess_conv2d_depthwise_kernel(kernel, kernel_shape, data_format): # As of Keras 2.0.0, all kernels are normalized # on the format `(rows, cols, input_depth, depth)`, # independently of `data_format`. # Theano expects `(input_depth * depth, 1, rows, cols)` for depthwise convolution. kernel = kernel[::-1, ::-1, :, :] kernel = kernel.dimshuffle((2, 3, 0, 1)) kernel = reshape(kernel, kernel_shape) return kernel def _preprocess_conv3d_kernel(kernel, data_format): # As of Keras 2.0.0, all kernels are normalized # on the format `(space, input_depth, depth)`, # independently of `data_format`. # Theano expects `(depth, input_depth, space)`. kernel = kernel.dimshuffle((4, 3, 0, 1, 2)) return kernel def _preprocess_padding(padding): if padding == 'same': th_padding = 'half' elif padding == 'valid': th_padding = 'valid' elif padding == 'full': th_padding = 'full' else: raise ValueError('Border mode not supported:', str(padding)) return th_padding def _preprocess_conv2d_image_shape(image_shape, data_format): # Theano might not accept long type def int_or_none(value): try: return int(value) except TypeError: return None if data_format == 'channels_last': if image_shape: image_shape = transpose_shape(image_shape, 'channels_first', spatial_axes=(1, 2)) if image_shape is not None: image_shape = tuple(int_or_none(v) for v in image_shape) return image_shape def _preprocess_conv3d_volume_shape(volume_shape, data_format): # Theano might not accept long type def int_or_none(value): try: return int(value) except TypeError: return None if data_format == 'channels_last': if volume_shape: volume_shape = (volume_shape[0], volume_shape[4], volume_shape[1], volume_shape[2], volume_shape[3]) if volume_shape is not None: volume_shape = tuple(int_or_none(v) for v in volume_shape) return volume_shape def _preprocess_conv2d_filter_shape(filter_shape, data_format): # Theano might not accept long type def int_or_none(value): try: return int(value) except TypeError: return None if filter_shape: filter_shape = (filter_shape[3], filter_shape[2], filter_shape[0], filter_shape[1]) if filter_shape is not None: filter_shape = tuple(int_or_none(v) for v in filter_shape) return filter_shape def _preprocess_conv2d_depthwise_filter_shape(filter_shape, data_format): # Theano might not accept long type def int_or_none(value): try: return int(value) except TypeError: return None if filter_shape: filter_shape = (filter_shape[3] * filter_shape[2], 1, filter_shape[0], filter_shape[1]) if filter_shape is not None: filter_shape = tuple(int_or_none(v) for v in filter_shape) return filter_shape def _preprocess_conv3d_filter_shape(filter_shape, data_format): # Theano might not accept long type def int_or_none(value): try: return int(value) except TypeError: return None if filter_shape: filter_shape = (filter_shape[4], filter_shape[3], filter_shape[0], filter_shape[1], filter_shape[2]) if filter_shape is not None: filter_shape = tuple(int_or_none(v) for v in filter_shape) return filter_shape def _postprocess_conv2d_output(conv_out, x, padding, kernel_shape, strides, data_format): if padding == 'same': if kernel_shape[2] % 2 == 0: conv_out = conv_out[:, :, :(x.shape[2] + strides[0] - 1) // strides[0], :] if kernel_shape[3] % 2 == 0: conv_out = conv_out[:, :, :, :(x.shape[3] + strides[1] - 1) // strides[1]] if data_format == 'channels_last': conv_out = conv_out.dimshuffle((0, 2, 3, 1)) return conv_out def _postprocess_conv3d_output(conv_out, x, padding, kernel_shape, strides, data_format): if padding == 'same': if kernel_shape[2] % 2 == 0: conv_out = conv_out[:, :, :(x.shape[2] + strides[0] - 1) // strides[0], :, :] if kernel_shape[3] % 2 == 0: conv_out = conv_out[:, :, :, :(x.shape[3] + strides[1] - 1) // strides[1], :] if kernel_shape[4] % 2 == 0: conv_out = conv_out[:, :, :, :, :(x.shape[4] + strides[2] - 1) // strides[2]] if data_format == 'channels_last': conv_out = conv_out.dimshuffle((0, 2, 3, 4, 1)) return conv_out def conv1d(x, kernel, strides=1, padding='valid', data_format=None, dilation_rate=1): """1D convolution. # Arguments kernel: kernel tensor. strides: stride integer. padding: string, `"same"`, `"causal"` or `"valid"`. data_format: string, one of "channels_last", "channels_first" dilation_rate: integer. """ data_format = normalize_data_format(data_format) kernel_shape = int_shape(kernel) if padding == 'causal': # causal (dilated) convolution: if not kernel_shape: raise AttributeError('Causal padding requires kernel._keras_shape set.') left_pad = dilation_rate * (kernel_shape[0] - 1) x = temporal_padding(x, (left_pad, 0)) padding = 'valid' shape = int_shape(x) if data_format == 'channels_last': # original shape: (batch, length, input_dim) # add dim to x to have (batch, length, 1, input_dim) x = expand_dims(x, 2) # update x._keras_shape if shape is not None: x._keras_shape = (shape[0], shape[1], 1, shape[2]) else: # original shape: (batch, input_dim, length) # add dim to x to have (batch, input_dim, length, 1) x = expand_dims(x, 3) # update x._keras_shape if shape is not None: x._keras_shape = (shape[0], shape[1], shape[2], 1) # update dilation rate, strides dilation_rate = (dilation_rate, 1) strides = (strides, 1) # add dim to kernel (always same format independently of data_format) # i.e. (rows, 1, input_depth, depth) kernel = expand_dims(kernel, 1) output = conv2d(x, kernel, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate) # remove added dim if data_format == 'channels_last': output = squeeze(output, 2) else: output = squeeze(output, 3) return output def conv2d(x, kernel, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1)): """2D convolution. # Arguments kernel: kernel tensor. strides: strides tuple. padding: string, "same" or "valid". data_format: "channels_last" or "channels_first". Whether to use Theano or TensorFlow data format in inputs/kernels/outputs. """ data_format = normalize_data_format(data_format) image_shape = _preprocess_conv2d_image_shape(int_shape(x), data_format) kernel_shape = int_shape(kernel) if kernel_shape is None: kernel_shape = kernel.eval().shape # in case of a shared variable kernel_shape = _preprocess_conv2d_filter_shape(kernel_shape, data_format) x = _preprocess_conv2d_input(x, data_format) kernel = _preprocess_conv2d_kernel(kernel, data_format) th_padding = _preprocess_padding(padding) conv_out = T.nnet.conv2d(x, kernel, border_mode=th_padding, subsample=strides, input_shape=image_shape, filter_shape=kernel_shape, filter_dilation=dilation_rate) conv_out = _postprocess_conv2d_output(conv_out, x, padding, kernel_shape, strides, data_format) return conv_out def conv2d_transpose(x, kernel, output_shape, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1)): """2D deconvolution (transposed convolution). # Arguments kernel: kernel tensor. output_shape: desired dimensions of output. strides: strides tuple. padding: string, "same" or "valid". data_format: "channels_last" or "channels_first". Whether to use Theano or TensorFlow data format in inputs/kernels/outputs. dilation_rate: tuple of 2 integers. # Raises ValueError: if using an even kernel size with padding 'same'. """ flip_filters = False data_format = normalize_data_format(data_format) if data_format == 'channels_last': output_shape = (output_shape[0], output_shape[3], output_shape[1], output_shape[2]) kernel_shape = int_shape(kernel) if kernel_shape is None: kernel_shape = kernel.eval().shape # in case of a shared variable if padding == 'same' and kernel_shape[0] % 2 == 0: raise ValueError('In `Conv2DTranspose`, with padding mode `same`, ' 'even kernel sizes are not supported with Theano. ' 'You can set `kernel_size` to an odd number.') kernel_shape = _preprocess_conv2d_filter_shape(kernel_shape, data_format) x = _preprocess_conv2d_input(x, data_format) kernel = _preprocess_conv2d_kernel(kernel, data_format) th_padding = _preprocess_padding(padding) op = T.nnet.abstract_conv.AbstractConv2d_gradInputs(imshp=None, kshp=kernel_shape, subsample=strides, border_mode=th_padding, filter_flip=not flip_filters, filter_dilation=dilation_rate) conv_out = op(kernel, x, output_shape[2:]) conv_out = _postprocess_conv2d_output(conv_out, x, padding, kernel_shape, strides, data_format) return conv_out def separable_conv1d(x, depthwise_kernel, pointwise_kernel, strides=1, padding='valid', data_format=None, dilation_rate=1): """1D convolution with separable filters. # Arguments x: input tensor depthwise_kernel: convolution kernel for the depthwise convolution. pointwise_kernel: kernel for the 1x1 convolution. strides: strides integer. padding: string, `"same"` or `"valid"`. data_format: string, `"channels_last"` or `"channels_first"`. dilation_rate: integer dilation rate. # Returns Output tensor. # Raises ValueError: if `data_format` is neither `"channels_last"` or `"channels_first"`. """ data_format = normalize_data_format(data_format) if isinstance(strides, int): strides = (strides,) if isinstance(dilation_rate, int): dilation_rate = (dilation_rate,) if data_format == 'channels_last': spatial_start_dim = 2 else: spatial_start_dim = 3 x = expand_dims(x, spatial_start_dim) depthwise_kernel = expand_dims(depthwise_kernel, 1) pointwise_kernel = expand_dims(pointwise_kernel, 1) strides = strides + (1,) dilation_rate = dilation_rate + (1,) image_shape = _preprocess_conv2d_image_shape(int_shape(x), data_format) depthwise_kernel_shape = int_shape(depthwise_kernel) if depthwise_kernel_shape is None: depthwise_kernel_shape = depthwise_kernel.eval().shape # in case of a shared variable depthwise_kernel_shape = _preprocess_conv2d_depthwise_filter_shape(depthwise_kernel_shape, data_format) pointwise_kernel_shape = int_shape(pointwise_kernel) if pointwise_kernel_shape is None: pointwise_kernel_shape = pointwise_kernel.eval().shape # in case of a shared variable pointwise_kernel_shape = _preprocess_conv2d_filter_shape(pointwise_kernel_shape, data_format) x = _preprocess_conv2d_input(x, data_format) depthwise_kernel = _preprocess_conv2d_depthwise_kernel(depthwise_kernel, depthwise_kernel_shape, data_format) pointwise_kernel = _preprocess_conv2d_kernel(pointwise_kernel, data_format) th_padding = _preprocess_padding(padding) conv_out = T.nnet.conv2d(x, depthwise_kernel, border_mode=th_padding, subsample=strides, input_shape=image_shape, filter_shape=depthwise_kernel_shape, filter_dilation=dilation_rate, num_groups=image_shape[1]) conv_out = T.nnet.conv2d(conv_out, pointwise_kernel, border_mode=th_padding, subsample=(1, 1), input_shape=None, filter_shape=pointwise_kernel_shape, filter_dilation=dilation_rate) conv_out = _postprocess_conv2d_output(conv_out, x, padding, pointwise_kernel_shape, strides, data_format) conv_out = squeeze(conv_out, spatial_start_dim) return conv_out def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1)): """2D convolution with separable filters. # Arguments x: input tensor depthwise_kernel: convolution kernel for the depthwise convolution. pointwise_kernel: kernel for the 1x1 convolution. strides: strides tuple (length 2). padding: string, `"same"` or `"valid"`. data_format: string, `"channels_last"` or `"channels_first"`. dilation_rate: tuple of integers, dilation rates for the separable convolution. # Returns Output tensor. # Raises ValueError: if `data_format` is neither `"channels_last"` or `"channels_first"`. """ data_format = normalize_data_format(data_format) image_shape = _preprocess_conv2d_image_shape(int_shape(x), data_format) depthwise_kernel_shape = int_shape(depthwise_kernel) if depthwise_kernel_shape is None: depthwise_kernel_shape = depthwise_kernel.eval().shape # in case of a shared variable depthwise_kernel_shape = _preprocess_conv2d_depthwise_filter_shape(depthwise_kernel_shape, data_format) pointwise_kernel_shape = int_shape(pointwise_kernel) if pointwise_kernel_shape is None: pointwise_kernel_shape = pointwise_kernel.eval().shape # in case of a shared variable pointwise_kernel_shape = _preprocess_conv2d_filter_shape(pointwise_kernel_shape, data_format) x = _preprocess_conv2d_input(x, data_format) depthwise_kernel = _preprocess_conv2d_depthwise_kernel(depthwise_kernel, depthwise_kernel_shape, data_format) pointwise_kernel = _preprocess_conv2d_kernel(pointwise_kernel, data_format) th_padding = _preprocess_padding(padding) conv_out = T.nnet.conv2d(x, depthwise_kernel, border_mode=th_padding, subsample=strides, input_shape=image_shape, filter_shape=depthwise_kernel_shape, filter_dilation=dilation_rate, num_groups=image_shape[1]) conv_out = T.nnet.conv2d(conv_out, pointwise_kernel, border_mode=th_padding, subsample=(1, 1), input_shape=None, filter_shape=pointwise_kernel_shape, filter_dilation=dilation_rate) conv_out = _postprocess_conv2d_output(conv_out, x, padding, pointwise_kernel_shape, strides, data_format) return conv_out def depthwise_conv2d(x, depthwise_kernel, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1)): """2D convolution with separable filters. # Arguments x: input tensor depthwise_kernel: convolution kernel for the depthwise convolution. strides: strides tuple (length 2). padding: string, `"same"` or `"valid"`. data_format: string, `"channels_last"` or `"channels_first"`. dilation_rate: tuple of integers, dilation rates for the separable convolution. # Returns Output tensor. # Raises ValueError: if `data_format` is neither `"channels_last"` or `"channels_first"`. """ data_format = normalize_data_format(data_format) image_shape = _preprocess_conv2d_image_shape(int_shape(x), data_format) depthwise_kernel_shape = int_shape(depthwise_kernel) if depthwise_kernel_shape is None: depthwise_kernel_shape = depthwise_kernel.eval().shape # in case of a shared variable depthwise_kernel_shape = _preprocess_conv2d_depthwise_filter_shape(depthwise_kernel_shape, data_format) x = _preprocess_conv2d_input(x, data_format) depthwise_kernel = _preprocess_conv2d_depthwise_kernel(depthwise_kernel, depthwise_kernel_shape, data_format) th_padding = _preprocess_padding(padding) conv_out = T.nnet.conv2d(x, depthwise_kernel, border_mode=th_padding, subsample=strides, input_shape=image_shape, filter_shape=depthwise_kernel_shape, filter_dilation=dilation_rate, num_groups=image_shape[1]) conv_out = _postprocess_conv2d_output(conv_out, x, padding, depthwise_kernel_shape, strides, data_format) return conv_out def conv3d(x, kernel, strides=(1, 1, 1), padding='valid', data_format=None, dilation_rate=(1, 1, 1)): """3D convolution. # Arguments kernel: kernel tensor. strides: strides tuple. padding: string, "same" or "valid". data_format: "channels_last" or "channels_first". Whether to use Theano or TensorFlow data format in inputs/kernels/outputs. """ data_format = normalize_data_format(data_format) volume_shape = _preprocess_conv3d_volume_shape(int_shape(x), data_format) kernel_shape = int_shape(kernel) if kernel_shape is None: kernel_shape = kernel.eval().shape # in case of a shared variable kernel_shape = _preprocess_conv3d_filter_shape(kernel_shape, data_format) x = _preprocess_conv3d_input(x, data_format) kernel = _preprocess_conv3d_kernel(kernel, data_format) th_padding = _preprocess_padding(padding) conv_out = T.nnet.conv3d(x, kernel, border_mode=th_padding, subsample=strides, input_shape=volume_shape, filter_shape=kernel_shape, filter_dilation=dilation_rate) conv_out = _postprocess_conv3d_output(conv_out, x, padding, kernel_shape, strides, data_format) return conv_out def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1), padding='valid', data_format=None): """3D deconvolution (transposed convolution). # Arguments kernel: kernel tensor. output_shape: desired dimensions of output. strides: strides tuple. padding: string, "same" or "valid". data_format: "channels_last" or "channels_first". Whether to use Theano or TensorFlow data format in inputs/kernels/outputs. # Raises ValueError: if using an even kernel size with padding 'same'. """ flip_filters = False data_format = normalize_data_format(data_format) if data_format == 'channels_last': output_shape = (output_shape[0], output_shape[4], output_shape[1], output_shape[2], output_shape[3]) kernel_shape = int_shape(kernel) if kernel_shape is None: kernel_shape = kernel.eval().shape # in case of a shared variable if padding == 'same' and kernel_shape[0] % 2 == 0: raise ValueError('In `Conv3DTranspose`, with padding mode `same`, ' 'even kernel sizes are not supported with Theano. ' 'You can set `kernel_size` to an odd number.') kernel_shape = _preprocess_conv3d_filter_shape(kernel_shape, data_format) x = _preprocess_conv3d_input(x, data_format) kernel = _preprocess_conv3d_kernel(kernel, data_format) th_padding = _preprocess_padding(padding) op = T.nnet.abstract_conv.AbstractConv3d_gradInputs(imshp=None, kshp=kernel_shape, subsample=strides, border_mode=th_padding, filter_flip=not flip_filters) conv_out = op(kernel, x, output_shape[2:]) conv_out = _postprocess_conv3d_output(conv_out, x, padding, kernel_shape, strides, data_format) return conv_out def pool2d(x, pool_size, strides=(1, 1), padding='valid', data_format=None, pool_mode='max'): data_format = normalize_data_format(data_format) assert pool_size[0] >= 1 and pool_size[1] >= 1 if padding == 'same': w_pad = pool_size[0] - 2 if pool_size[0] > 2 and pool_size[0] % 2 == 1 else pool_size[0] - 1 h_pad = pool_size[1] - 2 if pool_size[1] > 2 and pool_size[1] % 2 == 1 else pool_size[1] - 1 pad = (w_pad, h_pad) elif padding == 'valid': pad = (0, 0) else: raise ValueError('Invalid border mode:', padding) if data_format == 'channels_last': x = x.dimshuffle((0, 3, 1, 2)) if pool_mode == 'max': pool_out = pool.pool_2d(x, ws=pool_size, stride=strides, ignore_border=True, pad=pad, mode='max') elif pool_mode == 'avg': pool_out = pool.pool_2d(x, ws=pool_size, stride=strides, ignore_border=True, pad=pad, mode='average_exc_pad') else: raise ValueError('Invalid pooling mode:', pool_mode) if padding == 'same': expected_width = (x.shape[2] + strides[0] - 1) // strides[0] expected_height = (x.shape[3] + strides[1] - 1) // strides[1] pool_out = pool_out[:, :, : expected_width, : expected_height] if data_format == 'channels_last': pool_out = pool_out.dimshuffle((0, 2, 3, 1)) return pool_out def pool3d(x, pool_size, strides=(1, 1, 1), padding='valid', data_format=None, pool_mode='max'): data_format = normalize_data_format(data_format) if padding == 'same': w_pad = pool_size[0] - 2 if pool_size[0] % 2 == 1 else pool_size[0] - 1 h_pad = pool_size[1] - 2 if pool_size[1] % 2 == 1 else pool_size[1] - 1 d_pad = pool_size[2] - 2 if pool_size[2] % 2 == 1 else pool_size[2] - 1 pad = (w_pad, h_pad, d_pad) elif padding == 'valid': pad = (0, 0, 0) else: raise ValueError('Invalid padding:', padding) if data_format == 'channels_last': x = x.dimshuffle((0, 4, 1, 2, 3)) if pool_mode == 'max': pool_out = pool.pool_3d(x, ws=pool_size, stride=strides, ignore_border=True, pad=pad, mode='max') elif pool_mode == 'avg': pool_out = pool.pool_3d(x, ws=pool_size, stride=strides, ignore_border=True, pad=pad, mode='average_exc_pad') else: raise ValueError('Invalid pooling mode:', pool_mode) if padding == 'same': expected_width = (x.shape[2] + strides[0] - 1) // strides[0] expected_height = (x.shape[3] + strides[1] - 1) // strides[1] expected_depth = (x.shape[4] + strides[2] - 1) // strides[2] pool_out = pool_out[:, :, : expected_width, : expected_height, : expected_depth] if data_format == 'channels_last': pool_out = pool_out.dimshuffle((0, 2, 3, 4, 1)) return pool_out def bias_add(x, bias, data_format=None): data_format = normalize_data_format(data_format) if ndim(bias) != 1 and ndim(bias) != ndim(x) - 1: raise ValueError('Unexpected bias dimensions %d, ' 'expect to be 1 or %d dimensions' % (ndim(bias), ndim(x) - 1)) bias_shape = tuple(bias.shape) if ndim(x) == 5: if data_format == 'channels_first': if ndim(bias) == 1: x += reshape(bias, (1, bias_shape[0], 1, 1, 1)) else: x += reshape(bias, (1, bias_shape[3]) + bias_shape[:3]) elif data_format == 'channels_last': if ndim(bias) == 1: x += reshape(bias, (1, 1, 1, 1, bias_shape[0])) else: x += reshape(bias, (1,) + bias_shape) elif ndim(x) == 4: if data_format == 'channels_first': if ndim(bias) == 1: x += reshape(bias, (1, bias_shape[0], 1, 1)) else: x += reshape(bias, (1, bias_shape[2]) + bias_shape[:2]) elif data_format == 'channels_last': if ndim(bias) == 1: x += reshape(bias, (1, 1, 1, bias_shape[0])) else: x += reshape(bias, (1,) + bias_shape) elif ndim(x) == 3: if data_format == 'channels_first': if ndim(bias) == 1: x += reshape(bias, (1, bias_shape[0], 1)) else: x += reshape(bias, (1, bias_shape[1], bias_shape[0])) elif data_format == 'channels_last': if ndim(bias) == 1: x += reshape(bias, (1, 1, bias_shape[0])) else: x += reshape(bias, (1,) + bias_shape) else: x += bias return x # RANDOMNESS def random_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): if dtype is None: dtype = floatx() if seed is None: seed = np.random.randint(1, 10e6) rng = RandomStreams(seed=seed) return rng.normal(size=shape, avg=mean, std=stddev, dtype=dtype) def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None): if dtype is None: dtype = floatx() if seed is None: seed = np.random.randint(1, 10e6) rng = RandomStreams(seed=seed) return rng.uniform(shape, low=minval, high=maxval, dtype=dtype) def random_binomial(shape, p=0.0, dtype=None, seed=None): if dtype is None: dtype = floatx() if seed is None: seed = np.random.randint(1, 10e6) rng = RandomStreams(seed=seed) return rng.binomial(shape, p=p, dtype=dtype) def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): if dtype is None: dtype = floatx() if seed is None: seed = np.random.randint(1, 10e6) rng = RandomStreams(seed=seed) try: return rng.normal(size=shape, avg=mean, std=stddev, dtype=dtype, truncate=True) except TypeError: normal_t = rng.normal(size=shape, avg=mean, std=stddev, dtype=dtype) # Poor man's truncated normal: we literally clip the tensor return T.clip(normal_t, mean - 2 * stddev, mean + 2 * stddev) # Theano implementation of CTC # Used with permission from Shawn Tan # https://github.com/shawntan/ # Note that TensorFlow's native CTC code is significantly # faster than this def ctc_interleave_blanks(Y): Y_ = T.alloc(-1, Y.shape[0] * 2 + 1) Y_ = T.set_subtensor(Y_[T.arange(Y.shape[0]) * 2 + 1], Y) return Y_ def ctc_create_skip_idxs(Y): skip_idxs = T.arange((Y.shape[0] - 3) // 2) * 2 + 1 non_repeats = T.neq(Y[skip_idxs], Y[skip_idxs + 2]) return skip_idxs[non_repeats.nonzero()] def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev): active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()] active_next = T.cast(T.minimum( T.maximum( active + 1, T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1 ), log_p_curr.shape[0]), 'int32') common_factor = T.max(log_p_prev[:active]) p_prev = T.exp(log_p_prev[:active] - common_factor) _p_prev = zeros[:active_next] # copy over _p_prev = T.set_subtensor(_p_prev[:active], p_prev) # previous transitions _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1]) # skip transitions _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs]) updated_log_p_prev = T.log(_p_prev) + common_factor log_p_next = T.set_subtensor( zeros[:active_next], log_p_curr[:active_next] + updated_log_p_prev ) return active_next, log_p_next def ctc_path_probs(predict, Y, alpha=1e-4): smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0] L = T.log(smoothed_predict) zeros = T.zeros_like(L[0]) log_first = zeros f_skip_idxs = ctc_create_skip_idxs(Y) b_skip_idxs = ctc_create_skip_idxs(Y[::-1]) # there should be a shortcut to calculating this def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev): f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev) b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev) return f_active_next, log_f_next, b_active_next, log_b_next [f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan( step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first]) idxs = T.arange(L.shape[1]).dimshuffle('x', 0) mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1] log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L return log_probs, mask def ctc_cost(predict, Y): log_probs, mask = ctc_path_probs(predict, ctc_interleave_blanks(Y)) common_factor = T.max(log_probs) total_log_prob = T.log(T.sum(T.exp(log_probs - common_factor)[mask.nonzero()])) + common_factor return -total_log_prob # batchifies original CTC code def ctc_batch_cost(y_true, y_pred, input_length, label_length): """Runs CTC loss algorithm on each batch element. # Arguments y_true: tensor (samples, max_string_length) containing the truth labels y_pred: tensor (samples, time_steps, num_categories) containing the prediction, or output of the softmax input_length: tensor (samples,1) containing the sequence length for each batch item in y_pred label_length: tensor (samples,1) containing the sequence length for each batch item in y_true # Returns Tensor with shape (samples,1) containing the CTC loss of each element """ def ctc_step(y_true_step, y_pred_step, input_length_step, label_length_step): y_pred_step = y_pred_step[0: input_length_step[0]] y_true_step = y_true_step[0:label_length_step[0]] return ctc_cost(y_pred_step, y_true_step) ret, _ = theano.scan( fn=ctc_step, outputs_info=None, sequences=[y_true, y_pred, input_length, label_length] ) ret = ret.dimshuffle('x', 0) return ret # HIGH ORDER FUNCTIONS def map_fn(fn, elems, name=None, dtype=None): """Map the function fn over the elements elems and return the outputs. # Arguments fn: Callable that will be called upon each element in elems elems: tensor, at least 2 dimensional name: A string name for the map node in the graph # Returns Tensor with first dimension equal to the elems and second depending on fn """ return theano.map(fn, elems, name=name)[0] def foldl(fn, elems, initializer=None, name=None): """Reduce elems using fn to combine them from left to right. # Arguments fn: Callable that will be called upon each element in elems and an accumulator, for instance lambda acc, x: acc + x elems: tensor initializer: The first value used (elems[0] in case of None) name: A string name for the foldl node in the graph # Returns Same type and shape as initializer """ if initializer is None: initializer = elems[0] elems = elems[1:] # We need to change the order of the arguments because theano accepts x as # first parameter and accumulator as second return theano.foldl(lambda x, acc: fn(acc, x), elems, initializer, name=name)[0] def foldr(fn, elems, initializer=None, name=None): """Reduce elems using fn to combine them from right to left. # Arguments fn: Callable that will be called upon each element in elems and an accumulator, for instance lambda acc, x: acc + x elems: tensor initializer: The first value used (elems[-1] in case of None) name: A string name for the foldr node in the graph # Returns Same type and shape as initializer """ if initializer is None: initializer = elems[-1] elems = elems[:-1] # We need to change the order of the arguments because theano accepts x as # first parameter and accumulator as second return theano.foldr(lambda x, acc: fn(acc, x), elems, initializer, name=name)[0] def local_conv1d(inputs, kernel, kernel_size, strides, data_format=None): data_format = normalize_data_format(data_format) stride = strides[0] kernel_shape = int_shape(kernel) output_length, feature_dim, filters = kernel_shape xs = [] for i in range(output_length): slice_length = py_slice(i * stride, i * stride + kernel_size[0]) xs.append(reshape(inputs[:, slice_length, :], (1, -1, feature_dim))) x_aggregate = concatenate(xs, axis=0) # Shape: `(output_length, batch_size, filters)`. output = batch_dot(x_aggregate, kernel) return permute_dimensions(output, (1, 0, 2)) def local_conv2d(inputs, kernel, kernel_size, strides, output_shape, data_format=None): data_format = normalize_data_format(data_format) stride_row, stride_col = strides output_row, output_col = output_shape kernel_shape = int_shape(kernel) _, feature_dim, filters = kernel_shape if data_format == 'channels_first': output = [] for i in range(output_row): for j in range(output_col): slice_row = py_slice(i * stride_row, i * stride_row + kernel_size[0]) slice_col = py_slice(j * stride_col, j * stride_col + kernel_size[1]) x_flatten = reshape(inputs[:, :, slice_row, slice_col], (1, -1, feature_dim)) output.append(dot(x_flatten, kernel[i * output_col + j, :, :])) output = concatenate(output, axis=0) output = reshape(output, (output_row, output_col, -1, filters)) output = permute_dimensions(output, (2, 3, 0, 1)) else: xs = [] for i in range(output_row): for j in range(output_col): slice_row = py_slice(i * stride_row, i * stride_row + kernel_size[0]) slice_col = py_slice(j * stride_col, j * stride_col + kernel_size[1]) xs.append(reshape(inputs[:, slice_row, slice_col, :], (1, -1, feature_dim))) x_aggregate = concatenate(xs, axis=0) output = batch_dot(x_aggregate, kernel) output = reshape(output, (output_row, output_col, -1, filters)) output = permute_dimensions(output, (2, 0, 1, 3)) return output Keras-2.2.4/keras/backend/tensorflow_backend.py0000644000000000116100000042675713354530144021267 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.python.framework import ops as tf_ops from tensorflow.python.training import moving_averages from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import functional_ops from tensorflow.python.ops import ctc_ops as ctc from tensorflow.python.client import device_lib from tensorflow.core.protobuf import config_pb2 from collections import defaultdict import numpy as np from distutils.version import StrictVersion import os from .common import floatx from .common import epsilon from .common import normalize_data_format from ..utils.generic_utils import transpose_shape from ..utils.generic_utils import has_arg # Legacy functions from .common import set_image_dim_ordering from .common import image_dim_ordering py_all = all py_any = any py_sum = sum py_slice = slice # INTERNAL UTILS # This is the default internal TF session used by Keras. # It can be set manually via `set_session(sess)`. _SESSION = None # This dictionary holds a mapping {graph: learning_phase}. # A learning phase is a bool tensor used to run Keras models in # either train mode (learning_phase == 1) or test mode (learning_phase == 0). _GRAPH_LEARNING_PHASES = {} # This dictionary holds a mapping {graph: UID_DICT}. # each UID_DICT is a dictionary mapping name prefixes to a current index, # used for generating graph-specific string UIDs # for various names (e.g. layer names). _GRAPH_UID_DICTS = {} # This boolean flag can be set to True to leave variable initialization # up to the user. # Change its value via `manual_variable_initialization(value)`. _MANUAL_VAR_INIT = False # This list holds the available devices. # It is populated when `_get_available_gpus()` is called for the first time. # We assume our devices don't change during our lifetime. _LOCAL_DEVICES = None def get_uid(prefix=''): """Get the uid for the default graph. # Arguments prefix: An optional prefix of the graph. # Returns A unique identifier for the graph. """ global _GRAPH_UID_DICTS graph = tf.get_default_graph() if graph not in _GRAPH_UID_DICTS: _GRAPH_UID_DICTS[graph] = defaultdict(int) _GRAPH_UID_DICTS[graph][prefix] += 1 return _GRAPH_UID_DICTS[graph][prefix] def reset_uids(): """Resets graph identifiers. """ global _GRAPH_UID_DICTS _GRAPH_UID_DICTS = {} def clear_session(): """Destroys the current TF graph and creates a new one. Useful to avoid clutter from old models / layers. """ global _SESSION global _GRAPH_LEARNING_PHASES tf.reset_default_graph() reset_uids() _SESSION = None phase = tf.placeholder_with_default(False, shape=(), name='keras_learning_phase') _GRAPH_LEARNING_PHASES = {} _GRAPH_LEARNING_PHASES[tf.get_default_graph()] = phase def manual_variable_initialization(value): """Sets the manual variable initialization flag. This boolean flag determines whether variables should be initialized as they are instantiated (default), or if the user should handle the initialization (e.g. via `tf.initialize_all_variables()`). # Arguments value: Python boolean. """ global _MANUAL_VAR_INIT _MANUAL_VAR_INIT = value def learning_phase(): """Returns the learning phase flag. The learning phase flag is a bool tensor (0 = test, 1 = train) to be passed as input to any Keras function that uses a different behavior at train time and test time. # Returns Learning phase (scalar integer tensor or Python integer). """ graph = tf.get_default_graph() if graph not in _GRAPH_LEARNING_PHASES: phase = tf.placeholder_with_default(False, shape=(), name='keras_learning_phase') _GRAPH_LEARNING_PHASES[graph] = phase return _GRAPH_LEARNING_PHASES[graph] def set_learning_phase(value): """Sets the learning phase to a fixed value. # Arguments value: Learning phase value, either 0 or 1 (integers). # Raises ValueError: if `value` is neither `0` nor `1`. """ global _GRAPH_LEARNING_PHASES if value not in {0, 1}: raise ValueError('Expected learning phase to be ' '0 or 1.') _GRAPH_LEARNING_PHASES[tf.get_default_graph()] = value def get_session(): """Returns the TF session to be used by the backend. If a default TensorFlow session is available, we will return it. Else, we will return the global Keras session. If no global Keras session exists at this point: we will create a new global session. Note that you can manually set the global session via `K.set_session(sess)`. # Returns A TensorFlow session. """ global _SESSION default_session = tf.get_default_session() if default_session is not None: session = default_session else: if _SESSION is None: if not os.environ.get('OMP_NUM_THREADS'): config = tf.ConfigProto(allow_soft_placement=True) else: num_thread = int(os.environ.get('OMP_NUM_THREADS')) config = tf.ConfigProto(intra_op_parallelism_threads=num_thread, allow_soft_placement=True) _SESSION = tf.Session(config=config) session = _SESSION if not _MANUAL_VAR_INIT: with session.graph.as_default(): variables = tf.global_variables() candidate_vars = [] for v in variables: if not getattr(v, '_keras_initialized', False): candidate_vars.append(v) if candidate_vars: # This step is expensive, so we only run it on variables # not already marked as initialized. is_initialized = session.run( [tf.is_variable_initialized(v) for v in candidate_vars]) uninitialized_vars = [] for flag, v in zip(is_initialized, candidate_vars): if not flag: uninitialized_vars.append(v) v._keras_initialized = True if uninitialized_vars: session.run(tf.variables_initializer(uninitialized_vars)) # hack for list_devices() function. # list_devices() function is not available under tensorflow r1.3. if not hasattr(session, 'list_devices'): session.list_devices = lambda: device_lib.list_local_devices() return session def set_session(session): """Sets the global TensorFlow session. # Arguments session: A TF Session. """ global _SESSION _SESSION = session # DEVICE MANIPULATION AND PROBING class _TfDeviceCaptureOp(object): """Class for capturing the TF device scope.""" def __init__(self): self.device = None def _set_device(self, device): """This method captures TF's explicit device scope setting.""" self.device = device def _get_current_tf_device(): """Return explicit device of current context, otherwise returns `None`. # Returns If the current device scope is explicitly set, it returns a string with the device (`CPU` or `GPU`). If the scope is not explicitly set, it will return `None`. """ g = tf.get_default_graph() op = _TfDeviceCaptureOp() g._apply_device_functions(op) return op.device def _is_current_explicit_device(device_type): """Check if the current device is explicitly set on the device type specified. # Arguments device_type: A string containing `GPU` or `CPU` (case-insensitive). # Returns A boolean indicating if the current device scope is explicitly set on the device type. # Raises ValueError: If the `device_type` string indicates an unsupported device. """ device_type = device_type.upper() if device_type not in ['CPU', 'GPU']: raise ValueError('`device_type` should be either "CPU" or "GPU".') device = _get_current_tf_device() return (device is not None and device.device_type == device_type.upper()) def _get_available_gpus(): """Get a list of available gpu devices (formatted as strings). # Returns A list of available GPU devices. """ global _LOCAL_DEVICES if _LOCAL_DEVICES is None: _LOCAL_DEVICES = get_session().list_devices() return [x.name for x in _LOCAL_DEVICES if x.device_type == 'GPU'] def _has_nchw_support(): """Check whether the current scope supports NCHW ops. TensorFlow does not support NCHW on CPU. Therefore we check if we are not explicitly put on CPU, and have GPUs available. In this case there will be soft-placing on the GPU device. # Returns bool: if the current scope device placement would support nchw """ explicitly_on_cpu = _is_current_explicit_device('CPU') gpus_available = len(_get_available_gpus()) > 0 return (not explicitly_on_cpu and gpus_available) # VARIABLE MANIPULATION def _to_tensor(x, dtype): """Convert the input `x` to a tensor of type `dtype`. # Arguments x: An object to be converted (numpy array, list, tensors). dtype: The destination type. # Returns A tensor. """ return tf.convert_to_tensor(x, dtype=dtype) def is_sparse(tensor): """Returns whether a tensor is a sparse tensor. # Arguments tensor: A tensor instance. # Returns A boolean. # Example ```python >>> from keras import backend as K >>> a = K.placeholder((2, 2), sparse=False) >>> print(K.is_sparse(a)) False >>> b = K.placeholder((2, 2), sparse=True) >>> print(K.is_sparse(b)) True ``` """ return isinstance(tensor, tf.SparseTensor) def to_dense(tensor): """Converts a sparse tensor into a dense tensor and returns it. # Arguments tensor: A tensor instance (potentially sparse). # Returns A dense tensor. # Examples ```python >>> from keras import backend as K >>> b = K.placeholder((2, 2), sparse=True) >>> print(K.is_sparse(b)) True >>> c = K.to_dense(b) >>> print(K.is_sparse(c)) False ``` """ if is_sparse(tensor): return tf.sparse_tensor_to_dense(tensor) else: return tensor name_scope = tf.name_scope def variable(value, dtype=None, name=None, constraint=None): """Instantiates a variable and returns it. # Arguments value: Numpy array, initial value of the tensor. dtype: Tensor type. name: Optional name string for the tensor. constraint: Optional projection function to be applied to the variable after an optimizer update. # Returns A variable instance (with Keras metadata included). # Examples ```python >>> from keras import backend as K >>> val = np.array([[1, 2], [3, 4]]) >>> kvar = K.variable(value=val, dtype='float64', name='example_var') >>> K.dtype(kvar) 'float64' >>> print(kvar) example_var >>> K.eval(kvar) array([[ 1., 2.], [ 3., 4.]]) ``` """ if dtype is None: dtype = floatx() if hasattr(value, 'tocoo'): sparse_coo = value.tocoo() indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), np.expand_dims(sparse_coo.col, 1)), 1) v = tf.SparseTensor(indices=indices, values=sparse_coo.data, dense_shape=sparse_coo.shape) v._keras_shape = sparse_coo.shape v._uses_learning_phase = False return v v = tf.Variable(value, dtype=tf.as_dtype(dtype), name=name) if isinstance(value, np.ndarray): v._keras_shape = value.shape elif hasattr(value, 'get_shape'): v._keras_shape = int_shape(value) v._uses_learning_phase = False # TODO: move to Variable constructor when supported in public release. try: v.constraint = constraint except AttributeError: v._constraint = constraint return v def constant(value, dtype=None, shape=None, name=None): """Creates a constant tensor. # Arguments value: A constant value (or list) dtype: The type of the elements of the resulting tensor. shape: Optional dimensions of resulting tensor. name: Optional name for the tensor. # Returns A Constant Tensor. """ if dtype is None: dtype = floatx() return tf.constant(value, dtype=dtype, shape=shape, name=name) def is_keras_tensor(x): """Returns whether `x` is a Keras tensor. A "Keras tensor" is a tensor that was returned by a Keras layer, (`Layer` class) or by `Input`. # Arguments x: A candidate tensor. # Returns A boolean: Whether the argument is a Keras tensor. # Raises ValueError: In case `x` is not a symbolic tensor. # Examples ```python >>> from keras import backend as K >>> from keras.layers import Input, Dense >>> np_var = numpy.array([1, 2]) >>> K.is_keras_tensor(np_var) # A numpy array is not a symbolic tensor. ValueError >>> k_var = tf.placeholder('float32', shape=(1,1)) >>> K.is_keras_tensor(k_var) # A variable indirectly created outside of keras is not a Keras tensor. False >>> keras_var = K.variable(np_var) >>> K.is_keras_tensor(keras_var) # A variable created with the keras backend is not a Keras tensor. False >>> keras_placeholder = K.placeholder(shape=(2, 4, 5)) >>> K.is_keras_tensor(keras_placeholder) # A placeholder is not a Keras tensor. False >>> keras_input = Input([10]) >>> K.is_keras_tensor(keras_input) # An Input is a Keras tensor. True >>> keras_layer_output = Dense(10)(keras_input) >>> K.is_keras_tensor(keras_layer_output) # Any Keras layer output is a Keras tensor. True ``` """ if not is_tensor(x): raise ValueError('Unexpectedly found an instance of type `' + str(type(x)) + '`. ' 'Expected a symbolic tensor instance.') return hasattr(x, '_keras_history') def is_tensor(x): return isinstance(x, tf_ops._TensorLike) or tf_ops.is_dense_tensor_like(x) def placeholder(shape=None, ndim=None, dtype=None, sparse=False, name=None): """Instantiates a placeholder tensor and returns it. # Arguments shape: Shape of the placeholder (integer tuple, may include `None` entries). ndim: Number of axes of the tensor. At least one of {`shape`, `ndim`} must be specified. If both are specified, `shape` is used. dtype: Placeholder type. sparse: Boolean, whether the placeholder should have a sparse type. name: Optional name string for the placeholder. # Returns Tensor instance (with Keras metadata included). # Examples ```python >>> from keras import backend as K >>> input_ph = K.placeholder(shape=(2, 4, 5)) >>> input_ph._keras_shape (2, 4, 5) >>> input_ph ``` """ if dtype is None: dtype = floatx() if not shape: if ndim: shape = tuple([None for _ in range(ndim)]) if sparse: x = tf.sparse_placeholder(dtype, shape=shape, name=name) else: x = tf.placeholder(dtype, shape=shape, name=name) x._keras_shape = shape x._uses_learning_phase = False return x def is_placeholder(x): """Returns whether `x` is a placeholder. # Arguments x: A candidate placeholder. # Returns Boolean. """ try: return x.op.type == 'Placeholder' except AttributeError: return False def shape(x): """Returns the symbolic shape of a tensor or variable. # Arguments x: A tensor or variable. # Returns A symbolic shape (which is itself a tensor). # Examples ```python # TensorFlow example >>> from keras import backend as K >>> tf_session = K.get_session() >>> val = np.array([[1, 2], [3, 4]]) >>> kvar = K.variable(value=val) >>> inputs = keras.backend.placeholder(shape=(2, 4, 5)) >>> K.shape(kvar) >>> K.shape(inputs) # To get integer shape (Instead, you can use K.int_shape(x)) >>> K.shape(kvar).eval(session=tf_session) array([2, 2], dtype=int32) >>> K.shape(inputs).eval(session=tf_session) array([2, 4, 5], dtype=int32) ``` """ return tf.shape(x) def int_shape(x): """Returns the shape of tensor or variable as a tuple of int or None entries. # Arguments x: Tensor or variable. # Returns A tuple of integers (or None entries). # Examples ```python >>> from keras import backend as K >>> inputs = K.placeholder(shape=(2, 4, 5)) >>> K.int_shape(inputs) (2, 4, 5) >>> val = np.array([[1, 2], [3, 4]]) >>> kvar = K.variable(value=val) >>> K.int_shape(kvar) (2, 2) ``` """ if hasattr(x, '_keras_shape'): return x._keras_shape try: return tuple(x.get_shape().as_list()) except ValueError: return None def ndim(x): """Returns the number of axes in a tensor, as an integer. # Arguments x: Tensor or variable. # Returns Integer (scalar), number of axes. # Examples ```python >>> from keras import backend as K >>> inputs = K.placeholder(shape=(2, 4, 5)) >>> val = np.array([[1, 2], [3, 4]]) >>> kvar = K.variable(value=val) >>> K.ndim(inputs) 3 >>> K.ndim(kvar) 2 ``` """ dims = x.get_shape()._dims if dims is not None: return len(dims) return None def dtype(x): """Returns the dtype of a Keras tensor or variable, as a string. # Arguments x: Tensor or variable. # Returns String, dtype of `x`. # Examples ```python >>> from keras import backend as K >>> K.dtype(K.placeholder(shape=(2,4,5))) 'float32' >>> K.dtype(K.placeholder(shape=(2,4,5), dtype='float32')) 'float32' >>> K.dtype(K.placeholder(shape=(2,4,5), dtype='float64')) 'float64' # Keras variable >>> kvar = K.variable(np.array([[1, 2], [3, 4]])) >>> K.dtype(kvar) 'float32_ref' >>> kvar = K.variable(np.array([[1, 2], [3, 4]]), dtype='float32') >>> K.dtype(kvar) 'float32_ref' ``` """ return x.dtype.base_dtype.name def eval(x): """Evaluates the value of a variable. # Arguments x: A variable. # Returns A Numpy array. # Examples ```python >>> from keras import backend as K >>> kvar = K.variable(np.array([[1, 2], [3, 4]]), dtype='float32') >>> K.eval(kvar) array([[ 1., 2.], [ 3., 4.]], dtype=float32) ``` """ return to_dense(x).eval(session=get_session()) def zeros(shape, dtype=None, name=None): """Instantiates an all-zeros variable and returns it. # Arguments shape: Tuple of integers, shape of returned Keras variable dtype: String, data type of returned Keras variable name: String, name of returned Keras variable # Returns A variable (including Keras metadata), filled with `0.0`. Note that if `shape` was symbolic, we cannot return a variable, and will return a dynamically-shaped tensor instead. # Example ```python >>> from keras import backend as K >>> kvar = K.zeros((3,4)) >>> K.eval(kvar) array([[ 0., 0., 0., 0.], [ 0., 0., 0., 0.], [ 0., 0., 0., 0.]], dtype=float32) ``` """ if dtype is None: dtype = floatx() tf_dtype = tf.as_dtype(dtype) v = tf.zeros(shape=shape, dtype=tf_dtype, name=name) if py_all(v.get_shape().as_list()): return variable(v, dtype=dtype, name=name) return v def ones(shape, dtype=None, name=None): """Instantiates an all-ones variable and returns it. # Arguments shape: Tuple of integers, shape of returned Keras variable. dtype: String, data type of returned Keras variable. name: String, name of returned Keras variable. # Returns A Keras variable, filled with `1.0`. Note that if `shape` was symbolic, we cannot return a variable, and will return a dynamically-shaped tensor instead. # Example ```python >>> from keras import backend as K >>> kvar = K.ones((3,4)) >>> K.eval(kvar) array([[ 1., 1., 1., 1.], [ 1., 1., 1., 1.], [ 1., 1., 1., 1.]], dtype=float32) ``` """ if dtype is None: dtype = floatx() tf_dtype = tf.as_dtype(dtype) v = tf.ones(shape=shape, dtype=tf_dtype, name=name) if py_all(v.get_shape().as_list()): return variable(v, dtype=dtype, name=name) return v def eye(size, dtype=None, name=None): """Instantiate an identity matrix and returns it. # Arguments size: Integer, number of rows/columns. dtype: String, data type of returned Keras variable. name: String, name of returned Keras variable. # Returns A Keras variable, an identity matrix. # Example ```python >>> from keras import backend as K >>> kvar = K.eye(3) >>> K.eval(kvar) array([[ 1., 0., 0.], [ 0., 1., 0.], [ 0., 0., 1.]], dtype=float32) ``` """ if dtype is None: dtype = floatx() tf_dtype = tf.as_dtype(dtype) return variable(tf.eye(size, dtype=tf_dtype), dtype, name) def zeros_like(x, dtype=None, name=None): """Instantiates an all-zeros variable of the same shape as another tensor. # Arguments x: Keras variable or Keras tensor. dtype: String, dtype of returned Keras variable. None uses the dtype of x. name: String, name for the variable to create. # Returns A Keras variable with the shape of x filled with zeros. # Example ```python >>> from keras import backend as K >>> kvar = K.variable(np.random.random((2,3))) >>> kvar_zeros = K.zeros_like(kvar) >>> K.eval(kvar_zeros) array([[ 0., 0., 0.], [ 0., 0., 0.]], dtype=float32) ``` """ return tf.zeros_like(x, dtype=dtype, name=name) def ones_like(x, dtype=None, name=None): """Instantiates an all-ones variable of the same shape as another tensor. # Arguments x: Keras variable or tensor. dtype: String, dtype of returned Keras variable. None uses the dtype of x. name: String, name for the variable to create. # Returns A Keras variable with the shape of x filled with ones. # Example ```python >>> from keras import backend as K >>> kvar = K.variable(np.random.random((2,3))) >>> kvar_ones = K.ones_like(kvar) >>> K.eval(kvar_ones) array([[ 1., 1., 1.], [ 1., 1., 1.]], dtype=float32) ``` """ return tf.ones_like(x, dtype=dtype, name=name) def identity(x, name=None): """Returns a tensor with the same content as the input tensor. # Arguments x: The input tensor. name: String, name for the variable to create. # Returns A tensor of the same shape, type and content. """ return tf.identity(x, name) def random_uniform_variable(shape, low, high, dtype=None, name=None, seed=None): """Instantiates a variable with values drawn from a uniform distribution. # Arguments shape: Tuple of integers, shape of returned Keras variable. low: Float, lower boundary of the output interval. high: Float, upper boundary of the output interval. dtype: String, dtype of returned Keras variable. name: String, name of returned Keras variable. seed: Integer, random seed. # Returns A Keras variable, filled with drawn samples. # Example ```python # TensorFlow example >>> kvar = K.random_uniform_variable((2,3), 0, 1) >>> kvar >>> K.eval(kvar) array([[ 0.10940075, 0.10047495, 0.476143 ], [ 0.66137183, 0.00869417, 0.89220798]], dtype=float32) ``` """ if dtype is None: dtype = floatx() tf_dtype = tf.as_dtype(dtype) if seed is None: # ensure that randomness is conditioned by the Numpy RNG seed = np.random.randint(10e8) value = tf.random_uniform_initializer( low, high, dtype=tf_dtype, seed=seed)(shape) return variable(value, dtype=dtype, name=name) def random_normal_variable(shape, mean, scale, dtype=None, name=None, seed=None): """Instantiates a variable with values drawn from a normal distribution. # Arguments shape: Tuple of integers, shape of returned Keras variable. mean: Float, mean of the normal distribution. scale: Float, standard deviation of the normal distribution. dtype: String, dtype of returned Keras variable. name: String, name of returned Keras variable. seed: Integer, random seed. # Returns A Keras variable, filled with drawn samples. # Example ```python # TensorFlow example >>> kvar = K.random_normal_variable((2,3), 0, 1) >>> kvar >>> K.eval(kvar) array([[ 1.19591331, 0.68685907, -0.63814116], [ 0.92629528, 0.28055015, 1.70484698]], dtype=float32) ``` """ if dtype is None: dtype = floatx() tf_dtype = tf.as_dtype(dtype) if seed is None: # ensure that randomness is conditioned by the Numpy RNG seed = np.random.randint(10e8) value = tf.random_normal_initializer( mean, scale, dtype=tf_dtype, seed=seed)(shape) return variable(value, dtype=dtype, name=name) def count_params(x): """Returns the static number of elements in a Keras variable or tensor. # Arguments x: Keras variable or tensor. # Returns Integer, the number of elements in `x`, i.e., the product of the array's static dimensions. # Example ```python >>> kvar = K.zeros((2,3)) >>> K.count_params(kvar) 6 >>> K.eval(kvar) array([[ 0., 0., 0.], [ 0., 0., 0.]], dtype=float32) ``` """ return np.prod(int_shape(x)) def cast(x, dtype): """Casts a tensor to a different dtype and returns it. You can cast a Keras variable but it still returns a Keras tensor. # Arguments x: Keras tensor (or variable). dtype: String, either (`'float16'`, `'float32'`, or `'float64'`). # Returns Keras tensor with dtype `dtype`. # Example ```python >>> from keras import backend as K >>> input = K.placeholder((2, 3), dtype='float32') >>> input # It doesn't work in-place as below. >>> K.cast(input, dtype='float16') >>> input # you need to assign it. >>> input = K.cast(input, dtype='float16') >>> input ``` """ return tf.cast(x, dtype) # UPDATES OPS def update(x, new_x): """Update the value of `x` to `new_x`. # Arguments x: A `Variable`. new_x: A tensor of same shape as `x`. # Returns The variable `x` updated. """ return tf.assign(x, new_x) def update_add(x, increment): """Update the value of `x` by adding `increment`. # Arguments x: A `Variable`. increment: A tensor of same shape as `x`. # Returns The variable `x` updated. """ return tf.assign_add(x, increment) def update_sub(x, decrement): """Update the value of `x` by subtracting `decrement`. # Arguments x: A `Variable`. decrement: A tensor of same shape as `x`. # Returns The variable `x` updated. """ return tf.assign_sub(x, decrement) def moving_average_update(x, value, momentum): """Compute the moving average of a variable. # Arguments x: A `Variable`. value: A tensor with the same shape as `x`. momentum: The moving average momentum. # Returns An operation to update the variable. """ return moving_averages.assign_moving_average( x, value, momentum, zero_debias=True) # LINEAR ALGEBRA def dot(x, y): """Multiplies 2 tensors (and/or variables) and returns a *tensor*. When attempting to multiply a nD tensor with a nD tensor, it reproduces the Theano behavior. (e.g. `(2, 3) * (4, 3, 5) -> (2, 4, 5)`) # Arguments x: Tensor or variable. y: Tensor or variable. # Returns A tensor, dot product of `x` and `y`. # Examples ```python # dot product between tensors >>> x = K.placeholder(shape=(2, 3)) >>> y = K.placeholder(shape=(3, 4)) >>> xy = K.dot(x, y) >>> xy ``` ```python # dot product between tensors >>> x = K.placeholder(shape=(32, 28, 3)) >>> y = K.placeholder(shape=(3, 4)) >>> xy = K.dot(x, y) >>> xy ``` ```python # Theano-like behavior example >>> x = K.random_uniform_variable(shape=(2, 3), low=0, high=1) >>> y = K.ones((4, 3, 5)) >>> xy = K.dot(x, y) >>> K.int_shape(xy) (2, 4, 5) ``` """ if ndim(x) is not None and (ndim(x) > 2 or ndim(y) > 2): x_shape = [] for i, s in zip(int_shape(x), tf.unstack(tf.shape(x))): if i is not None: x_shape.append(i) else: x_shape.append(s) x_shape = tuple(x_shape) y_shape = [] for i, s in zip(int_shape(y), tf.unstack(tf.shape(y))): if i is not None: y_shape.append(i) else: y_shape.append(s) y_shape = tuple(y_shape) y_permute_dim = list(range(ndim(y))) y_permute_dim = [y_permute_dim.pop(-2)] + y_permute_dim xt = tf.reshape(x, [-1, x_shape[-1]]) yt = tf.reshape(tf.transpose(y, perm=y_permute_dim), [y_shape[-2], -1]) return tf.reshape(tf.matmul(xt, yt), x_shape[:-1] + y_shape[:-2] + y_shape[-1:]) if is_sparse(x): out = tf.sparse_tensor_dense_matmul(x, y) else: out = tf.matmul(x, y) return out def batch_dot(x, y, axes=None): """Batchwise dot product. `batch_dot` is used to compute dot product of `x` and `y` when `x` and `y` are data in batch, i.e. in a shape of `(batch_size, :)`. `batch_dot` results in a tensor or variable with less dimensions than the input. If the number of dimensions is reduced to 1, we use `expand_dims` to make sure that ndim is at least 2. # Arguments x: Keras tensor or variable with `ndim >= 2`. y: Keras tensor or variable with `ndim >= 2`. axes: list of (or single) int with target dimensions. The lengths of `axes[0]` and `axes[1]` should be the same. # Returns A tensor with shape equal to the concatenation of `x`'s shape (less the dimension that was summed over) and `y`'s shape (less the batch dimension and the dimension that was summed over). If the final rank is 1, we reshape it to `(batch_size, 1)`. # Examples Assume `x = [[1, 2], [3, 4]]` and `y = [[5, 6], [7, 8]]` `batch_dot(x, y, axes=1) = [[17], [53]]` which is the main diagonal of `x.dot(y.T)`, although we never have to calculate the off-diagonal elements. Shape inference: Let `x`'s shape be `(100, 20)` and `y`'s shape be `(100, 30, 20)`. If `axes` is (1, 2), to find the output shape of resultant tensor, loop through each dimension in `x`'s shape and `y`'s shape: * `x.shape[0]` : 100 : append to output shape * `x.shape[1]` : 20 : do not append to output shape, dimension 1 of `x` has been summed over. (`dot_axes[0]` = 1) * `y.shape[0]` : 100 : do not append to output shape, always ignore first dimension of `y` * `y.shape[1]` : 30 : append to output shape * `y.shape[2]` : 20 : do not append to output shape, dimension 2 of `y` has been summed over. (`dot_axes[1]` = 2) `output_shape` = `(100, 30)` ```python >>> x_batch = K.ones(shape=(32, 20, 1)) >>> y_batch = K.ones(shape=(32, 30, 20)) >>> xy_batch_dot = K.batch_dot(x_batch, y_batch, axes=[1, 2]) >>> K.int_shape(xy_batch_dot) (32, 1, 30) ``` """ if isinstance(axes, int): axes = (axes, axes) x_ndim = ndim(x) y_ndim = ndim(y) if axes is None: # behaves like tf.batch_matmul as default axes = [x_ndim - 1, y_ndim - 2] if py_any([isinstance(a, (list, tuple)) for a in axes]): raise ValueError('Multiple target dimensions are not supported. ' + 'Expected: None, int, (int, int), ' + 'Provided: ' + str(axes)) if x_ndim > y_ndim: diff = x_ndim - y_ndim y = tf.reshape(y, tf.concat([tf.shape(y), [1] * (diff)], axis=0)) elif y_ndim > x_ndim: diff = y_ndim - x_ndim x = tf.reshape(x, tf.concat([tf.shape(x), [1] * (diff)], axis=0)) else: diff = 0 if ndim(x) == 2 and ndim(y) == 2: if axes[0] == axes[1]: out = tf.reduce_sum(tf.multiply(x, y), axes[0]) else: out = tf.reduce_sum(tf.multiply(tf.transpose(x, [1, 0]), y), axes[1]) else: if axes is not None: adj_x = None if axes[0] == ndim(x) - 1 else True adj_y = True if axes[1] == ndim(y) - 1 else None else: adj_x = None adj_y = None out = tf.matmul(x, y, adjoint_a=adj_x, adjoint_b=adj_y) if diff: if x_ndim > y_ndim: idx = x_ndim + y_ndim - 3 else: idx = x_ndim - 1 out = tf.squeeze(out, list(range(idx, idx + diff))) if ndim(out) == 1: out = expand_dims(out, 1) return out def transpose(x): """Transposes a tensor and returns it. # Arguments x: Tensor or variable. # Returns A tensor. # Examples ```python >>> var = K.variable([[1, 2, 3], [4, 5, 6]]) >>> K.eval(var) array([[ 1., 2., 3.], [ 4., 5., 6.]], dtype=float32) >>> var_transposed = K.transpose(var) >>> K.eval(var_transposed) array([[ 1., 4.], [ 2., 5.], [ 3., 6.]], dtype=float32) ``` ```python >>> inputs = K.placeholder((2, 3)) >>> inputs >>> input_transposed = K.transpose(inputs) >>> input_transposed ``` """ return tf.transpose(x) def gather(reference, indices): """Retrieves the elements of indices `indices` in the tensor `reference`. # Arguments reference: A tensor. indices: An integer tensor of indices. # Returns A tensor of same type as `reference`. """ return tf.nn.embedding_lookup(reference, indices) # ELEMENT-WISE OPERATIONS def max(x, axis=None, keepdims=False): """Maximum value in a tensor. # Arguments x: A tensor or variable. axis: An integer or list of integers in [-rank(x), rank(x)), the axes to find maximum values. If `None` (default), finds the maximum over all dimensions. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. # Returns A tensor with maximum values of `x`. """ return tf.reduce_max(x, axis, keepdims) def min(x, axis=None, keepdims=False): """Minimum value in a tensor. # Arguments x: A tensor or variable. axis: An integer or list of integers in [-rank(x), rank(x)), the axes to find minimum values. If `None` (default), finds the minimum over all dimensions. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. # Returns A tensor with miminum values of `x`. """ return tf.reduce_min(x, axis, keepdims) def sum(x, axis=None, keepdims=False): """Sum of the values in a tensor, alongside the specified axis. # Arguments x: A tensor or variable. axis: An integer or list of integers in [-rank(x), rank(x)), the axes to sum over. If `None` (default), sums over all dimensions. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. # Returns A tensor with sum of `x`. """ return tf.reduce_sum(x, axis, keepdims) def prod(x, axis=None, keepdims=False): """Multiplies the values in a tensor, alongside the specified axis. # Arguments x: A tensor or variable. axis: An integer or list of integers in [-rank(x), rank(x)), the axes to compute the product. If `None` (default), computes the product over all dimensions. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. # Returns A tensor with the product of elements of `x`. """ return tf.reduce_prod(x, axis, keepdims) def cumsum(x, axis=0): """Cumulative sum of the values in a tensor, alongside the specified axis. # Arguments x: A tensor or variable. axis: An integer, the axis to compute the sum. # Returns A tensor of the cumulative sum of values of `x` along `axis`. """ return tf.cumsum(x, axis=axis) def cumprod(x, axis=0): """Cumulative product of the values in a tensor, alongside the specified axis. # Arguments x: A tensor or variable. axis: An integer, the axis to compute the product. # Returns A tensor of the cumulative product of values of `x` along `axis`. """ return tf.cumprod(x, axis=axis) def var(x, axis=None, keepdims=False): """Variance of a tensor, alongside the specified axis. # Arguments x: A tensor or variable. axis: An integer or list of integers in [-rank(x), rank(x)), the axes to compute the variance. If `None` (default), computes the variance over all dimensions. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. # Returns A tensor with the variance of elements of `x`. """ if x.dtype.base_dtype == tf.bool: x = tf.cast(x, floatx()) m = tf.reduce_mean(x, axis, True) devs_squared = tf.square(x - m) return tf.reduce_mean(devs_squared, axis, keepdims) def std(x, axis=None, keepdims=False): """Standard deviation of a tensor, alongside the specified axis. # Arguments x: A tensor or variable. axis: An integer or list of integers in [-rank(x), rank(x)), the axes to compute the standard deviation. If `None` (default), computes the standard deviation over all dimensions. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. # Returns A tensor with the standard deviation of elements of `x`. """ return tf.sqrt(var(x, axis=axis, keepdims=keepdims)) def mean(x, axis=None, keepdims=False): """Mean of a tensor, alongside the specified axis. # Arguments x: A tensor or variable. axis: An integer or list of integers in [-rank(x), rank(x)), the axes to compute the mean. If `None` (default), computes the mean over all dimensions. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1 for each entry in `axis`. If `keepdims` is `True`, the reduced dimensions are retained with length 1. # Returns A tensor with the mean of elements of `x`. """ if x.dtype.base_dtype == tf.bool: x = tf.cast(x, floatx()) return tf.reduce_mean(x, axis, keepdims) def any(x, axis=None, keepdims=False): """Bitwise reduction (logical OR). # Arguments x: Tensor or variable. axis: An integer or list of integers in [-rank(x), rank(x)), the axes to compute the logical or. If `None` (default), computes the logical or over all dimensions. keepdims: whether the drop or broadcast the reduction axes. # Returns A uint8 tensor (0s and 1s). """ x = tf.cast(x, tf.bool) return tf.reduce_any(x, axis, keepdims) def all(x, axis=None, keepdims=False): """Bitwise reduction (logical AND). # Arguments x: Tensor or variable. axis: An integer or list of integers in [-rank(x), rank(x)), the axes to compute the logical and. If `None` (default), computes the logical and over all dimensions. keepdims: whether the drop or broadcast the reduction axes. # Returns A uint8 tensor (0s and 1s). """ x = tf.cast(x, tf.bool) return tf.reduce_all(x, axis, keepdims) def argmax(x, axis=-1): """Returns the index of the maximum value along an axis. # Arguments x: Tensor or variable. axis: axis along which to perform the reduction. # Returns A tensor. """ return tf.argmax(x, axis) def argmin(x, axis=-1): """Returns the index of the minimum value along an axis. # Arguments x: Tensor or variable. axis: axis along which to perform the reduction. # Returns A tensor. """ return tf.argmin(x, axis) def square(x): """Element-wise square. # Arguments x: Tensor or variable. # Returns A tensor. """ return tf.square(x) def abs(x): """Element-wise absolute value. # Arguments x: Tensor or variable. # Returns A tensor. """ return tf.abs(x) def sqrt(x): """Element-wise square root. # Arguments x: Tensor or variable. # Returns A tensor. """ zero = _to_tensor(0., x.dtype.base_dtype) inf = _to_tensor(np.inf, x.dtype.base_dtype) x = tf.clip_by_value(x, zero, inf) return tf.sqrt(x) def exp(x): """Element-wise exponential. # Arguments x: Tensor or variable. # Returns A tensor. """ return tf.exp(x) def log(x): """Element-wise log. # Arguments x: Tensor or variable. # Returns A tensor. """ return tf.log(x) def logsumexp(x, axis=None, keepdims=False): """Computes log(sum(exp(elements across dimensions of a tensor))). This function is more numerically stable than log(sum(exp(x))). It avoids overflows caused by taking the exp of large inputs and underflows caused by taking the log of small inputs. # Arguments x: A tensor or variable. axis: axis: An integer or list of integers in [-rank(x), rank(x)), the axes to compute the logsumexp. If `None` (default), computes the logsumexp over all dimensions. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. # Returns The reduced tensor. """ return tf.reduce_logsumexp(x, axis, keepdims) def round(x): """Element-wise rounding to the closest integer. In case of tie, the rounding mode used is "half to even". # Arguments x: Tensor or variable. # Returns A tensor. """ return tf.round(x) def sign(x): """Element-wise sign. # Arguments x: Tensor or variable. # Returns A tensor. """ return tf.sign(x) def pow(x, a): """Element-wise exponentiation. # Arguments x: Tensor or variable. a: Python integer. # Returns A tensor. """ return tf.pow(x, a) def clip(x, min_value, max_value): """Element-wise value clipping. # Arguments x: Tensor or variable. min_value: Python float or integer. max_value: Python float or integer. # Returns A tensor. """ if max_value is not None and max_value < min_value: max_value = min_value if max_value is None: max_value = np.inf min_value = _to_tensor(min_value, x.dtype.base_dtype) max_value = _to_tensor(max_value, x.dtype.base_dtype) return tf.clip_by_value(x, min_value, max_value) def equal(x, y): """Element-wise equality between two tensors. # Arguments x: Tensor or variable. y: Tensor or variable. # Returns A bool tensor. """ return tf.equal(x, y) def not_equal(x, y): """Element-wise inequality between two tensors. # Arguments x: Tensor or variable. y: Tensor or variable. # Returns A bool tensor. """ return tf.not_equal(x, y) def greater(x, y): """Element-wise truth value of (x > y). # Arguments x: Tensor or variable. y: Tensor or variable. # Returns A bool tensor. """ return tf.greater(x, y) def greater_equal(x, y): """Element-wise truth value of (x >= y). # Arguments x: Tensor or variable. y: Tensor or variable. # Returns A bool tensor. """ return tf.greater_equal(x, y) def less(x, y): """Element-wise truth value of (x < y). # Arguments x: Tensor or variable. y: Tensor or variable. # Returns A bool tensor. """ return tf.less(x, y) def less_equal(x, y): """Element-wise truth value of (x <= y). # Arguments x: Tensor or variable. y: Tensor or variable. # Returns A bool tensor. """ return tf.less_equal(x, y) def maximum(x, y): """Element-wise maximum of two tensors. # Arguments x: Tensor or variable. y: Tensor or variable. # Returns A tensor. """ return tf.maximum(x, y) def minimum(x, y): """Element-wise minimum of two tensors. # Arguments x: Tensor or variable. y: Tensor or variable. # Returns A tensor. """ return tf.minimum(x, y) def sin(x): """Computes sin of x element-wise. # Arguments x: Tensor or variable. # Returns A tensor. """ return tf.sin(x) def cos(x): """Computes cos of x element-wise. # Arguments x: Tensor or variable. # Returns A tensor. """ return tf.cos(x) def _regular_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3): """Non-fused version of `normalize_batch_in_training`. # Arguments x: Input tensor or variable. gamma: Tensor by which to scale the input. beta: Tensor with which to center the input. reduction_axes: iterable of integers, axes over which to normalize. epsilon: Fuzz factor. # Returns A tuple length of 3, `(normalized_tensor, mean, variance)`. """ mean, var = tf.nn.moments(x, reduction_axes, None, None, False) normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, epsilon) return normed, mean, var def _broadcast_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3): """Non-fused, broadcast version of `normalize_batch_in_training`. # Arguments x: Input tensor or variable. gamma: Tensor by which to scale the input. beta: Tensor with which to center the input. reduction_axes: iterable of integers, axes over which to normalize. epsilon: Fuzz factor. # Returns A tuple length of 3, `(normalized_tensor, mean, variance)`. """ mean, var = tf.nn.moments(x, reduction_axes, None, None, False) target_shape = [] for axis in range(ndim(x)): if axis in reduction_axes: target_shape.append(1) else: target_shape.append(tf.shape(x)[axis]) target_shape = tf.stack(target_shape) broadcast_mean = tf.reshape(mean, target_shape) broadcast_var = tf.reshape(var, target_shape) if gamma is None: broadcast_gamma = None else: broadcast_gamma = tf.reshape(gamma, target_shape) if beta is None: broadcast_beta = None else: broadcast_beta = tf.reshape(beta, target_shape) normed = tf.nn.batch_normalization( x, broadcast_mean, broadcast_var, broadcast_beta, broadcast_gamma, epsilon) return normed, mean, var def _fused_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3): """Fused version of `normalize_batch_in_training`. # Arguments x: Input tensor or variable. gamma: Tensor by which to scale the input. beta: Tensor with which to center the input. reduction_axes: iterable of integers, axes over which to normalize. epsilon: Fuzz factor. # Returns A tuple length of 3, `(normalized_tensor, mean, variance)`. """ if list(reduction_axes) == [0, 1, 2]: normalization_axis = 3 tf_data_format = 'NHWC' else: normalization_axis = 1 tf_data_format = 'NCHW' if gamma is None: gamma = tf.constant(1.0, dtype=x.dtype, shape=[x.get_shape()[normalization_axis]]) if beta is None: beta = tf.constant(0.0, dtype=x.dtype, shape=[x.get_shape()[normalization_axis]]) return tf.nn.fused_batch_norm( x, gamma, beta, epsilon=epsilon, data_format=tf_data_format) def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3): """Computes mean and std for batch then apply batch_normalization on batch. # Arguments x: Input tensor or variable. gamma: Tensor by which to scale the input. beta: Tensor with which to center the input. reduction_axes: iterable of integers, axes over which to normalize. epsilon: Fuzz factor. # Returns A tuple length of 3, `(normalized_tensor, mean, variance)`. """ if ndim(x) == 4 and list(reduction_axes) in [[0, 1, 2], [0, 2, 3]]: if not _has_nchw_support() and list(reduction_axes) == [0, 2, 3]: return _broadcast_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=epsilon) return _fused_normalize_batch_in_training( x, gamma, beta, reduction_axes, epsilon=epsilon) else: if sorted(reduction_axes) == list(range(ndim(x)))[:-1]: return _regular_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=epsilon) else: return _broadcast_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=epsilon) def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3): """Applies batch normalization on x given mean, var, beta and gamma. I.e. returns: `output = (x - mean) / sqrt(var + epsilon) * gamma + beta` # Arguments x: Input tensor or variable. mean: Mean of batch. var: Variance of batch. beta: Tensor with which to center the input. gamma: Tensor by which to scale the input. axis: Integer, the axis that should be normalized. (typically the features axis). epsilon: Fuzz factor. # Returns A tensor. """ if ndim(x) == 4: # The CPU implementation of FusedBatchNorm only support NHWC if axis == 1 or axis == -3: tf_data_format = 'NCHW' elif axis == 3 or axis == -1: tf_data_format = 'NHWC' else: tf_data_format = None if tf_data_format == 'NHWC' or tf_data_format == 'NCHW' and _has_nchw_support(): # The mean / var / beta / gamma may be processed by broadcast # so it may have extra axes with 1, it is not needed and should be removed if ndim(mean) > 1: mean = tf.reshape(mean, (-1)) if ndim(var) > 1: var = tf.reshape(var, (-1)) if beta is None: beta = zeros_like(mean) elif ndim(beta) > 1: beta = tf.reshape(beta, (-1)) if gamma is None: gamma = ones_like(mean) elif ndim(gamma) > 1: gamma = tf.reshape(gamma, (-1)) y, _, _ = tf.nn.fused_batch_norm( x, gamma, beta, epsilon=epsilon, mean=mean, variance=var, data_format=tf_data_format, is_training=False ) return y # default return tf.nn.batch_normalization(x, mean, var, beta, gamma, epsilon) # SHAPE OPERATIONS def concatenate(tensors, axis=-1): """Concatenates a list of tensors alongside the specified axis. # Arguments tensors: list of tensors to concatenate. axis: concatenation axis. # Returns A tensor. """ if axis < 0: rank = ndim(tensors[0]) if rank: axis %= rank else: axis = 0 if py_all([is_sparse(x) for x in tensors]): return tf.sparse_concat(axis, tensors) else: return tf.concat([to_dense(x) for x in tensors], axis) def reshape(x, shape): """Reshapes a tensor to the specified shape. # Arguments x: Tensor or variable. shape: Target shape tuple. # Returns A tensor. """ return tf.reshape(x, shape) def permute_dimensions(x, pattern): """Permutes axes in a tensor. # Arguments x: Tensor or variable. pattern: A tuple of dimension indices, e.g. `(0, 2, 1)`. # Returns A tensor. """ return tf.transpose(x, perm=pattern) def resize_images(x, height_factor, width_factor, data_format, interpolation='nearest'): """Resizes the images contained in a 4D tensor. # Arguments x: Tensor or variable to resize. height_factor: Positive integer. width_factor: Positive integer. data_format: string, `"channels_last"` or `"channels_first"`. interpolation: A string, one of `nearest` or `bilinear`. # Returns A tensor. # Raises ValueError: if `data_format` is neither `"channels_last"` or `"channels_first"`. """ if data_format == 'channels_first': rows, cols = 2, 3 else: rows, cols = 1, 2 original_shape = int_shape(x) new_shape = tf.shape(x)[rows:cols + 1] new_shape *= tf.constant(np.array([height_factor, width_factor], dtype='int32')) if data_format == 'channels_first': x = permute_dimensions(x, [0, 2, 3, 1]) if interpolation == 'nearest': x = tf.image.resize_nearest_neighbor(x, new_shape) elif interpolation == 'bilinear': x = tf.image.resize_bilinear(x, new_shape) else: raise ValueError('interpolation should be one ' 'of "nearest" or "bilinear".') if data_format == 'channels_first': x = permute_dimensions(x, [0, 3, 1, 2]) if original_shape[rows] is None: new_height = None else: new_height = original_shape[rows] * height_factor if original_shape[cols] is None: new_width = None else: new_width = original_shape[cols] * width_factor output_shape = (None, new_height, new_width, None) x.set_shape(transpose_shape(output_shape, data_format, spatial_axes=(1, 2))) return x def resize_volumes(x, depth_factor, height_factor, width_factor, data_format): """Resizes the volume contained in a 5D tensor. # Arguments x: Tensor or variable to resize. depth_factor: Positive integer. height_factor: Positive integer. width_factor: Positive integer. data_format: string, `"channels_last"` or `"channels_first"`. # Returns A tensor. # Raises ValueError: if `data_format` is neither `"channels_last"` or `"channels_first"`. """ if data_format == 'channels_first': output = repeat_elements(x, depth_factor, axis=2) output = repeat_elements(output, height_factor, axis=3) output = repeat_elements(output, width_factor, axis=4) return output elif data_format == 'channels_last': output = repeat_elements(x, depth_factor, axis=1) output = repeat_elements(output, height_factor, axis=2) output = repeat_elements(output, width_factor, axis=3) return output else: raise ValueError('Unknown data_format: ' + str(data_format)) def repeat_elements(x, rep, axis): """Repeats the elements of a tensor along an axis, like `np.repeat`. If `x` has shape `(s1, s2, s3)` and `axis` is `1`, the output will have shape `(s1, s2 * rep, s3)`. # Arguments x: Tensor or variable. rep: Python integer, number of times to repeat. axis: Axis along which to repeat. # Returns A tensor. """ x_shape = x.get_shape().as_list() # For static axis if x_shape[axis] is not None: # slices along the repeat axis splits = tf.split(value=x, num_or_size_splits=x_shape[axis], axis=axis) # repeat each slice the given number of reps x_rep = [s for s in splits for _ in range(rep)] return concatenate(x_rep, axis) # Here we use tf.tile to mimic behavior of np.repeat so that # we can handle dynamic shapes (that include None). # To do that, we need an auxiliary axis to repeat elements along # it and then merge them along the desired axis. # Repeating auxiliary_axis = axis + 1 x_shape = tf.shape(x) x_rep = tf.expand_dims(x, axis=auxiliary_axis) reps = np.ones(len(x.get_shape()) + 1) reps[auxiliary_axis] = rep x_rep = tf.tile(x_rep, reps) # Merging reps = np.delete(reps, auxiliary_axis) reps[axis] = rep reps = tf.constant(reps, dtype='int32') x_shape = x_shape * reps x_rep = tf.reshape(x_rep, x_shape) # Fix shape representation x_shape = x.get_shape().as_list() x_rep.set_shape(x_shape) x_rep._keras_shape = tuple(x_shape) return x_rep def repeat(x, n): """Repeats a 2D tensor. if `x` has shape (samples, dim) and `n` is `2`, the output will have shape `(samples, 2, dim)`. # Arguments x: Tensor or variable. n: Python integer, number of times to repeat. # Returns A tensor. """ assert ndim(x) == 2 x = tf.expand_dims(x, 1) pattern = tf.stack([1, n, 1]) return tf.tile(x, pattern) def arange(start, stop=None, step=1, dtype='int32'): """Creates a 1D tensor containing a sequence of integers. The function arguments use the same convention as Theano's arange: if only one argument is provided, it is in fact the "stop" argument and "start" is 0. The default type of the returned tensor is `'int32'` to match TensorFlow's default. # Arguments start: Start value. stop: Stop value. step: Difference between two successive values. dtype: Integer dtype to use. # Returns An integer tensor. """ # Match the behavior of numpy and Theano by returning an empty sequence. if stop is None: try: if start < 0: start = 0 except TypeError: # Handle case where start is a tensor start = tf.cond(start < 0, true_fn=lambda: tf.constant(0, dtype=start.dtype), false_fn=lambda: start) result = tf.range(start, limit=stop, delta=step, name='arange') if dtype != 'int32': result = cast(result, dtype) return result def tile(x, n): """Creates a tensor by tiling `x` by `n`. # Arguments x: A tensor or variable n: A list of integer. The length must be the same as the number of dimensions in `x`. # Returns A tiled tensor. """ if isinstance(n, int): n = [n] return tf.tile(x, n) def flatten(x): """Flatten a tensor. # Arguments x: A tensor or variable. # Returns A tensor, reshaped into 1-D """ return tf.reshape(x, [-1]) def batch_flatten(x): """Turn a nD tensor into a 2D tensor with same 0th dimension. In other words, it flattens each data samples of a batch. # Arguments x: A tensor or variable. # Returns A tensor. """ x = tf.reshape(x, tf.stack([-1, prod(shape(x)[1:])])) return x def expand_dims(x, axis=-1): """Adds a 1-sized dimension at index "axis". # Arguments x: A tensor or variable. axis: Position where to add a new axis. # Returns A tensor with expanded dimensions. """ return tf.expand_dims(x, axis) def squeeze(x, axis): """Removes a 1-dimension from the tensor at index "axis". # Arguments x: A tensor or variable. axis: Axis to drop. # Returns A tensor with the same data as `x` but reduced dimensions. """ return tf.squeeze(x, [axis]) def temporal_padding(x, padding=(1, 1)): """Pads the middle dimension of a 3D tensor. # Arguments x: Tensor or variable. padding: Tuple of 2 integers, how many zeros to add at the start and end of dim 1. # Returns A padded 3D tensor. """ assert len(padding) == 2 pattern = [[0, 0], [padding[0], padding[1]], [0, 0]] return tf.pad(x, pattern) def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None): """Pads the 2nd and 3rd dimensions of a 4D tensor. # Arguments x: Tensor or variable. padding: Tuple of 2 tuples, padding pattern. data_format: string, `"channels_last"` or `"channels_first"`. # Returns A padded 4D tensor. # Raises ValueError: if `data_format` is neither `"channels_last"` or `"channels_first"`. """ assert len(padding) == 2 assert len(padding[0]) == 2 assert len(padding[1]) == 2 data_format = normalize_data_format(data_format) pattern = [[0, 0], list(padding[0]), list(padding[1]), [0, 0]] pattern = transpose_shape(pattern, data_format, spatial_axes=(1, 2)) return tf.pad(x, pattern) def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None): """Pads 5D tensor with zeros along the depth, height, width dimensions. Pads these dimensions with respectively "padding[0]", "padding[1]" and "padding[2]" zeros left and right. For 'channels_last' data_format, the 2nd, 3rd and 4th dimension will be padded. For 'channels_first' data_format, the 3rd, 4th and 5th dimension will be padded. # Arguments x: Tensor or variable. padding: Tuple of 3 tuples, padding pattern. data_format: string, `"channels_last"` or `"channels_first"`. # Returns A padded 5D tensor. # Raises ValueError: if `data_format` is neither `"channels_last"` or `"channels_first"`. """ assert len(padding) == 3 assert len(padding[0]) == 2 assert len(padding[1]) == 2 assert len(padding[2]) == 2 data_format = normalize_data_format(data_format) pattern = [ [0, 0], [padding[0][0], padding[0][1]], [padding[1][0], padding[1][1]], [padding[2][0], padding[2][1]], [0, 0] ] pattern = transpose_shape(pattern, data_format, spatial_axes=(1, 2, 3)) return tf.pad(x, pattern) def stack(x, axis=0): """Stacks a list of rank `R` tensors into a rank `R+1` tensor. # Arguments x: List of tensors. axis: Axis along which to perform stacking. # Returns A tensor. """ return tf.stack(x, axis=axis) def one_hot(indices, num_classes): """Computes the one-hot representation of an integer tensor. # Arguments indices: nD integer tensor of shape `(batch_size, dim1, dim2, ... dim(n-1))` num_classes: Integer, number of classes to consider. # Returns (n + 1)D one hot representation of the input with shape `(batch_size, dim1, dim2, ... dim(n-1), num_classes)` """ return tf.one_hot(indices, depth=num_classes, axis=-1) def reverse(x, axes): """Reverses a tensor along the specified axes. # Arguments x: Tensor to reverse. axes: Integer or iterable of integers. Axes to reverse. # Returns A tensor. """ if isinstance(axes, int): axes = [axes] return tf.reverse(x, axes) def slice(x, start, size): """Extracts a slice from a tensor. # Arguments x: Input tensor. start: Integer list/tuple or tensor indicating the start indices of the slice along each axis. size: Integer list/tuple or tensor indicating how many dimensions to slice along each axis. # Returns Tensor `x[start[0]: start[0] + size[0], ..., start[-1]: start[-1] + size[-1]]` """ return tf.slice(x, start, size) # VALUE MANIPULATION def get_value(x): """Returns the value of a variable. # Arguments x: input variable. # Returns A Numpy array. """ return x.eval(session=get_session()) def batch_get_value(ops): """Returns the value of more than one tensor variable. # Arguments ops: list of ops to run. # Returns A list of Numpy arrays. """ if ops: return get_session().run(ops) else: return [] def set_value(x, value): """Sets the value of a variable, from a Numpy array. # Arguments x: Tensor to set to a new value. value: Value to set the tensor to, as a Numpy array (of the same shape). """ value = np.asarray(value, dtype=dtype(x)) tf_dtype = tf.as_dtype(x.dtype.name.split('_')[0]) if hasattr(x, '_assign_placeholder'): assign_placeholder = x._assign_placeholder assign_op = x._assign_op else: assign_placeholder = tf.placeholder(tf_dtype, shape=value.shape) assign_op = x.assign(assign_placeholder) x._assign_placeholder = assign_placeholder x._assign_op = assign_op get_session().run(assign_op, feed_dict={assign_placeholder: value}) def batch_set_value(tuples): """Sets the values of many tensor variables at once. # Arguments tuples: a list of tuples `(tensor, value)`. `value` should be a Numpy array. """ if tuples: assign_ops = [] feed_dict = {} for x, value in tuples: value = np.asarray(value, dtype=dtype(x)) tf_dtype = tf.as_dtype(x.dtype.name.split('_')[0]) if hasattr(x, '_assign_placeholder'): assign_placeholder = x._assign_placeholder assign_op = x._assign_op else: assign_placeholder = tf.placeholder(tf_dtype, shape=value.shape) assign_op = x.assign(assign_placeholder) x._assign_placeholder = assign_placeholder x._assign_op = assign_op assign_ops.append(assign_op) feed_dict[assign_placeholder] = value get_session().run(assign_ops, feed_dict=feed_dict) def get_variable_shape(x): """Returns the shape of a variable. # Arguments x: A variable. # Returns A tuple of integers. """ return int_shape(x) def print_tensor(x, message=''): """Prints `message` and the tensor value when evaluated. Note that `print_tensor` returns a new tensor identical to `x` which should be used in the following code. Otherwise the print operation is not taken into account during evaluation. # Example ```python >>> x = K.print_tensor(x, message="x is: ") ``` # Arguments x: Tensor to print. message: Message to print jointly with the tensor. # Returns The same tensor `x`, unchanged. """ return tf.Print(x, [x], message) # GRAPH MANIPULATION class Function(object): """Runs a computation graph. It's possible to pass arguments to `tf.Session.run()` via `session_kwargs`. In particular additional operations via `fetches` argument and additional tensor substitutions via `feed_dict` arguments. Note that given substitutions are merged with substitutions from `inputs`. Even though `feed_dict` is passed once in the constructor (called in `model.compile()`) we can modify the values in the dictionary. Through this feed_dict we can provide additional substitutions besides Keras inputs. # Arguments inputs: Feed placeholders to the computation graph. outputs: Output tensors to fetch. updates: Additional update ops to be run at function call. name: a name to help users identify what this function does. session_kwargs: arguments to `tf.Session.run()`: `fetches`, `feed_dict`, `options`, `run_metadata` """ def __init__(self, inputs, outputs, updates=None, name=None, **session_kwargs): updates = updates or [] if not isinstance(inputs, (list, tuple)): raise TypeError('`inputs` to a TensorFlow backend function ' 'should be a list or tuple.') if not isinstance(outputs, (list, tuple)): raise TypeError('`outputs` of a TensorFlow backend function ' 'should be a list or tuple.') if not isinstance(updates, (list, tuple)): raise TypeError('`updates` in a TensorFlow backend function ' 'should be a list or tuple.') self.inputs = list(inputs) self.outputs = list(outputs) with tf.control_dependencies(self.outputs): updates_ops = [] for update in updates: if isinstance(update, tuple): p, new_p = update updates_ops.append(tf.assign(p, new_p)) else: # assumed already an op updates_ops.append(update) self.updates_op = tf.group(*updates_ops) self.name = name # additional tensor substitutions self.feed_dict = session_kwargs.pop('feed_dict', {}) # additional operations self.fetches = session_kwargs.pop('fetches', []) if not isinstance(self.fetches, list): self.fetches = [self.fetches] # The main use case of `fetches` being passed to a model is the ability # to run custom updates # (since the outputs of fetches are never returned). # This requires us to wrap fetches in `identity` ops. self.fetches = [tf.identity(x) for x in self.fetches] # self.session_kwargs is used for _legacy_call self.session_kwargs = session_kwargs.copy() self.run_options = session_kwargs.pop('options', None) self.run_metadata = session_kwargs.pop('run_metadata', None) if session_kwargs: raise ValueError('Some keys in session_kwargs are not ' 'supported at this ' 'time: %s', session_kwargs.keys()) self._callable_fn = None self._feed_arrays = None self._feed_symbols = None self._symbol_vals = None self._session = None def _make_callable(self, feed_arrays, feed_symbols, symbol_vals, session): """Generates a callable that runs the graph. # Arguments feed_arrays: List of input tensors to be fed Numpy arrays at runtime. feed_symbols: List of input tensors to be fed symbolic tensors at runtime. symbol_vals: List of symbolic tensors to be fed to `feed_symbols`. session: Session to use to generate the callable. # Returns Function that runs the graph according to the above options. """ # Prepare callable options. callable_opts = config_pb2.CallableOptions() # Handle external-data feed. for x in feed_arrays: callable_opts.feed.append(x.name) if self.feed_dict: for key in sorted(self.feed_dict.keys()): callable_opts.feed.append(key.name) # Handle symbolic feed. for x, y in zip(feed_symbols, symbol_vals): connection = callable_opts.tensor_connection.add() if x.dtype != y.dtype: y = tf.cast(y, dtype=x.dtype) from_tensor = tf_ops._as_graph_element(y) if from_tensor is None: from_tensor = y connection.from_tensor = from_tensor.name # Data tensor connection.to_tensor = x.name # Placeholder # Handle fetches. for x in self.outputs + self.fetches: callable_opts.fetch.append(x.name) # Handle updates. callable_opts.target.append(self.updates_op.name) # Handle run_options. if self.run_options: callable_opts.run_options.CopyFrom(self.run_options) # Create callable. callable_fn = session._make_callable_from_options(callable_opts) # Cache parameters corresponding to the generated callable, so that # we can detect future mismatches and refresh the callable. self._callable_fn = callable_fn self._feed_arrays = feed_arrays self._feed_symbols = feed_symbols self._symbol_vals = symbol_vals self._session = session def _call(self, inputs): if not isinstance(inputs, (list, tuple)): raise TypeError('`inputs` should be a list or tuple.') session = get_session() feed_arrays = [] array_vals = [] feed_symbols = [] symbol_vals = [] for tensor, value in zip(self.inputs, inputs): if value is None: continue if is_tensor(value): # Case: feeding symbolic tensor. feed_symbols.append(tensor) symbol_vals.append(value) else: feed_arrays.append(tensor) # We need to do array conversion and type casting # at this level, since # `callable_fn` only supports exact matches. array_vals.append( np.asarray(value, dtype=tf.as_dtype(tensor.dtype).as_numpy_dtype)) if self.feed_dict: for key in sorted(self.feed_dict.keys()): array_vals.append( np.asarray(self.feed_dict[key], dtype=tf.as_dtype(key.dtype).as_numpy_dtype)) # Refresh callable if anything has changed. if (self._callable_fn is None or feed_arrays != self._feed_arrays or symbol_vals != self._symbol_vals or feed_symbols != self._feed_symbols or session != self._session): self._make_callable(feed_arrays, feed_symbols, symbol_vals, session) if self.run_metadata: fetched = self._callable_fn(*array_vals, run_metadata=self.run_metadata) else: fetched = self._callable_fn(*array_vals) return fetched[:len(self.outputs)] def _legacy_call(self, inputs): if not isinstance(inputs, (list, tuple)): raise TypeError('`inputs` should be a list or tuple.') feed_dict = self.feed_dict.copy() for tensor, value in zip(self.inputs, inputs): if is_sparse(tensor): sparse_coo = value.tocoo() indices = np.concatenate( (np.expand_dims(sparse_coo.row, 1), np.expand_dims(sparse_coo.col, 1)), 1) value = (indices, sparse_coo.data, sparse_coo.shape) feed_dict[tensor] = value fetches = self.outputs + [self.updates_op] + self.fetches session = get_session() updated = session.run(fetches=fetches, feed_dict=feed_dict, **self.session_kwargs) return updated[:len(self.outputs)] def __call__(self, inputs): if hasattr(get_session(), '_make_callable_from_options'): if py_any(is_sparse(x) for x in self.inputs): if py_any(is_tensor(x) for x in inputs): raise ValueError( 'Feeding from symbolic tensors is not ' 'supported with sparse inputs.') return self._legacy_call(inputs) # callable generated by Session._make_callable_from_options accepts # `run_metadata` keyword argument since TF 1.10 if (self.run_metadata and StrictVersion(tf.__version__.split('-')[0]) < StrictVersion('1.10.0')): if py_any(is_tensor(x) for x in inputs): raise ValueError( 'In order to feed symbolic tensors to a Keras model and set ' '`run_metadata`, you need tensorflow 1.10 or higher.') return self._legacy_call(inputs) return self._call(inputs) else: if py_any(is_tensor(x) for x in inputs): raise ValueError( 'In order to feed symbolic tensors to a Keras model ' 'in TensorFlow, you need tensorflow 1.8 or higher.') return self._legacy_call(inputs) def function(inputs, outputs, updates=None, **kwargs): """Instantiates a Keras function. # Arguments inputs: List of placeholder tensors. outputs: List of output tensors. updates: List of update ops. **kwargs: Passed to `tf.Session.run`. # Returns Output values as Numpy arrays. # Raises ValueError: if invalid kwargs are passed in. """ if kwargs: for key in kwargs: if not (has_arg(tf.Session.run, key, True) or has_arg(Function.__init__, key, True)): msg = 'Invalid argument "%s" passed to K.function with TensorFlow backend' % key raise ValueError(msg) return Function(inputs, outputs, updates=updates, **kwargs) def gradients(loss, variables): """Returns the gradients of `loss` w.r.t. `variables`. # Arguments loss: Scalar tensor to minimize. variables: List of variables. # Returns A gradients tensor. """ return tf.gradients(loss, variables, colocate_gradients_with_ops=True) def stop_gradient(variables): """Returns `variables` but with zero gradient w.r.t. every other variable. # Arguments variables: tensor or list of tensors to consider constant with respect to any other variable. # Returns A single tensor or a list of tensors (depending on the passed argument) that has constant gradient with respect to any other variable. """ if isinstance(variables, (list, tuple)): return map(tf.stop_gradient, variables) else: return tf.stop_gradient(variables) # CONTROL FLOW def rnn(step_function, inputs, initial_states, go_backwards=False, mask=None, constants=None, unroll=False, input_length=None): """Iterates over the time dimension of a tensor. # Arguments step_function: Parameters: inputs: Tensor with shape (samples, ...) (no time dimension), representing input for the batch of samples at a certain time step. states: List of tensors. Returns: outputs: Tensor with shape (samples, ...) (no time dimension), new_states: List of tensors, same length and shapes as 'states'. inputs: Tensor of temporal data of shape (samples, time, ...) (at least 3D). initial_states: Tensor with shape (samples, ...) (no time dimension), containing the initial values for the states used in the step function. go_backwards: Boolean. If True, do the iteration over the time dimension in reverse order and return the reversed sequence. mask: Binary tensor with shape (samples, time), with a zero for every element that is masked. constants: A list of constant values passed at each step. unroll: Whether to unroll the RNN or to use a symbolic loop (`while_loop` or `scan` depending on backend). input_length: Static number of timesteps in the input. # Returns A tuple, `(last_output, outputs, new_states)`. last_output: The latest output of the rnn, of shape `(samples, ...)` outputs: Tensor with shape `(samples, time, ...)` where each entry `outputs[s, t]` is the output of the step function at time `t` for sample `s`. new_states: List of tensors, latest states returned by the step function, of shape `(samples, ...)`. # Raises ValueError: If input dimension is less than 3. ValueError: If `unroll` is `True` but input timestep is not a fixed number. ValueError: If `mask` is provided (not `None`) but states is not provided (`len(states)` == 0). """ ndim = len(inputs.get_shape()) if ndim < 3: raise ValueError('Input should be at least 3D.') # Transpose to time-major, i.e. # from (batch, time, ...) to (time, batch, ...) axes = [1, 0] + list(range(2, ndim)) inputs = tf.transpose(inputs, (axes)) if mask is not None: if mask.dtype != tf.bool: mask = tf.cast(mask, tf.bool) if len(mask.get_shape()) == ndim - 1: mask = expand_dims(mask) mask = tf.transpose(mask, axes) if constants is None: constants = [] global uses_learning_phase uses_learning_phase = False if unroll: if not inputs.get_shape()[0]: raise ValueError('Unrolling requires a ' 'fixed number of timesteps.') states = initial_states successive_states = [] successive_outputs = [] input_list = tf.unstack(inputs) if go_backwards: input_list.reverse() if mask is not None: mask_list = tf.unstack(mask) if go_backwards: mask_list.reverse() for inp, mask_t in zip(input_list, mask_list): output, new_states = step_function(inp, states + constants) if getattr(output, '_uses_learning_phase', False): uses_learning_phase = True # tf.where needs its condition tensor # to be the same shape as its two # result tensors, but in our case # the condition (mask) tensor is # (nsamples, 1), and A and B are (nsamples, ndimensions). # So we need to # broadcast the mask to match the shape of A and B. # That's what the tile call does, # it just repeats the mask along its second dimension # n times. tiled_mask_t = tf.tile(mask_t, tf.stack([1, tf.shape(output)[1]])) if not successive_outputs: prev_output = zeros_like(output) else: prev_output = successive_outputs[-1] output = tf.where(tiled_mask_t, output, prev_output) return_states = [] for state, new_state in zip(states, new_states): # (see earlier comment for tile explanation) tiled_mask_t = tf.tile(mask_t, tf.stack([1, tf.shape(new_state)[1]])) return_states.append(tf.where(tiled_mask_t, new_state, state)) states = return_states successive_outputs.append(output) successive_states.append(states) last_output = successive_outputs[-1] new_states = successive_states[-1] outputs = tf.stack(successive_outputs) else: for inp in input_list: output, states = step_function(inp, states + constants) if getattr(output, '_uses_learning_phase', False): uses_learning_phase = True successive_outputs.append(output) successive_states.append(states) last_output = successive_outputs[-1] new_states = successive_states[-1] outputs = tf.stack(successive_outputs) else: if go_backwards: inputs = reverse(inputs, 0) states = tuple(initial_states) time_steps = tf.shape(inputs)[0] outputs, _ = step_function(inputs[0], initial_states + constants) output_ta = tensor_array_ops.TensorArray( dtype=outputs.dtype, size=time_steps, tensor_array_name='output_ta') input_ta = tensor_array_ops.TensorArray( dtype=inputs.dtype, size=time_steps, tensor_array_name='input_ta') input_ta = input_ta.unstack(inputs) time = tf.constant(0, dtype='int32', name='time') if mask is not None: if not states: raise ValueError('No initial states provided! ' 'When using masking in an RNN, you should ' 'provide initial states ' '(and your step function should return ' 'as its first state at time `t` ' 'the output at time `t-1`).') if go_backwards: mask = reverse(mask, 0) mask_ta = tensor_array_ops.TensorArray( dtype=tf.bool, size=time_steps, tensor_array_name='mask_ta') mask_ta = mask_ta.unstack(mask) def _step(time, output_ta_t, *states): """RNN step function. # Arguments time: Current timestep value. output_ta_t: TensorArray. *states: List of states. # Returns Tuple: `(time + 1,output_ta_t) + tuple(new_states)` """ current_input = input_ta.read(time) mask_t = mask_ta.read(time) output, new_states = step_function(current_input, tuple(states) + tuple(constants)) if getattr(output, '_uses_learning_phase', False): global uses_learning_phase uses_learning_phase = True for state, new_state in zip(states, new_states): new_state.set_shape(state.get_shape()) tiled_mask_t = tf.tile(mask_t, tf.stack([1, tf.shape(output)[1]])) output = tf.where(tiled_mask_t, output, states[0]) new_states = [ tf.where(tf.tile(mask_t, tf.stack([1, tf.shape(new_states[i])[1]])), new_states[i], states[i]) for i in range(len(states)) ] output_ta_t = output_ta_t.write(time, output) return (time + 1, output_ta_t) + tuple(new_states) else: def _step(time, output_ta_t, *states): """RNN step function. # Arguments time: Current timestep value. output_ta_t: TensorArray. *states: List of states. # Returns Tuple: `(time + 1,output_ta_t) + tuple(new_states)` """ current_input = input_ta.read(time) output, new_states = step_function(current_input, tuple(states) + tuple(constants)) if getattr(output, '_uses_learning_phase', False): global uses_learning_phase uses_learning_phase = True for state, new_state in zip(states, new_states): new_state.set_shape(state.get_shape()) output_ta_t = output_ta_t.write(time, output) return (time + 1, output_ta_t) + tuple(new_states) final_outputs = control_flow_ops.while_loop( cond=lambda time, *_: time < time_steps, body=_step, loop_vars=(time, output_ta) + states, parallel_iterations=32, swap_memory=True, maximum_iterations=input_length) last_time = final_outputs[0] output_ta = final_outputs[1] new_states = final_outputs[2:] outputs = output_ta.stack() last_output = output_ta.read(last_time - 1) axes = [1, 0] + list(range(2, len(outputs.get_shape()))) outputs = tf.transpose(outputs, axes) last_output._uses_learning_phase = uses_learning_phase return last_output, outputs, new_states def switch(condition, then_expression, else_expression): """Switches between two operations depending on a scalar value. Note that both `then_expression` and `else_expression` should be symbolic tensors of the *same shape*. # Arguments condition: tensor (`int` or `bool`). then_expression: either a tensor, or a callable that returns a tensor. else_expression: either a tensor, or a callable that returns a tensor. # Returns The selected tensor. # Raises ValueError: If rank of `condition` is greater than rank of expressions. """ if condition.dtype != tf.bool: condition = tf.cast(condition, 'bool') cond_ndim = ndim(condition) if not cond_ndim: if not callable(then_expression): def then_expression_fn(): return then_expression else: then_expression_fn = then_expression if not callable(else_expression): def else_expression_fn(): return else_expression else: else_expression_fn = else_expression x = tf.cond(condition, then_expression_fn, else_expression_fn) else: # tf.where needs its condition tensor # to be the same shape as its two # result tensors if callable(then_expression): then_expression = then_expression() if callable(else_expression): else_expression = else_expression() expr_ndim = ndim(then_expression) if cond_ndim > expr_ndim: raise ValueError('Rank of `condition` should be less than or' ' equal to rank of `then_expression` and ' '`else_expression`. ndim(condition)=' + str(cond_ndim) + ', ndim(then_expression)' '=' + str(expr_ndim)) if cond_ndim > 1: ndim_diff = expr_ndim - cond_ndim cond_shape = tf.concat([tf.shape(condition), [1] * ndim_diff], axis=0) condition = tf.reshape(condition, cond_shape) expr_shape = tf.shape(then_expression) shape_diff = expr_shape - cond_shape tile_shape = tf.where(shape_diff > 0, expr_shape, tf.ones_like(expr_shape)) condition = tf.tile(condition, tile_shape) x = tf.where(condition, then_expression, else_expression) return x def in_train_phase(x, alt, training=None): """Selects `x` in train phase, and `alt` otherwise. Note that `alt` should have the *same shape* as `x`. # Arguments x: What to return in train phase (tensor or callable that returns a tensor). alt: What to return otherwise (tensor or callable that returns a tensor). training: Optional scalar tensor (or Python boolean, or Python integer) specifying the learning phase. # Returns Either `x` or `alt` based on the `training` flag. the `training` flag defaults to `K.learning_phase()`. """ if training is None: training = learning_phase() uses_learning_phase = True else: uses_learning_phase = False if training is 1 or training is True: if callable(x): return x() else: return x elif training is 0 or training is False: if callable(alt): return alt() else: return alt # else: assume learning phase is a placeholder tensor. x = switch(training, x, alt) if uses_learning_phase: x._uses_learning_phase = True return x def in_test_phase(x, alt, training=None): """Selects `x` in test phase, and `alt` otherwise. Note that `alt` should have the *same shape* as `x`. # Arguments x: What to return in test phase (tensor or callable that returns a tensor). alt: What to return otherwise (tensor or callable that returns a tensor). training: Optional scalar tensor (or Python boolean, or Python integer) specifying the learning phase. # Returns Either `x` or `alt` based on `K.learning_phase`. """ return in_train_phase(alt, x, training=training) # NN OPERATIONS def relu(x, alpha=0., max_value=None, threshold=0.): """Rectified linear unit. With default values, it returns element-wise `max(x, 0)`. Otherwise, it follows: `f(x) = max_value` for `x >= max_value`, `f(x) = x` for `threshold <= x < max_value`, `f(x) = alpha * (x - threshold)` otherwise. # Arguments x: A tensor or variable. alpha: A scalar, slope of negative section (default=`0.`). max_value: float. Saturation threshold. threshold: float. Threshold value for thresholded activation. # Returns A tensor. """ if alpha != 0.: if max_value is None and threshold == 0.: return tf.nn.leaky_relu(x, alpha=alpha) if threshold != 0.: negative_part = tf.nn.relu(-x + threshold) else: negative_part = tf.nn.relu(-x) clip_max = max_value is not None if threshold != 0: # computes x for x > threshold else 0 x = x * tf.cast(tf.greater(x, threshold), floatx()) elif max_value == 6: # if no threshold, then can use nn.relu6 native TF op for performance x = tf.nn.relu6(x) clip_max = False else: x = tf.nn.relu(x) if clip_max: max_value = _to_tensor(max_value, x.dtype.base_dtype) zero = _to_tensor(0., x.dtype.base_dtype) x = tf.clip_by_value(x, zero, max_value) if alpha != 0: alpha = _to_tensor(alpha, x.dtype.base_dtype) x -= alpha * negative_part return x def elu(x, alpha=1.): """Exponential linear unit. # Arguments x: A tensor or variable to compute the activation function for. alpha: A scalar, slope of negative section. # Returns A tensor. """ res = tf.nn.elu(x) if alpha == 1: return res else: return tf.where(x > 0, res, alpha * res) def softmax(x, axis=-1): """Softmax of a tensor. # Arguments x: A tensor or variable. axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. # Returns A tensor. """ return tf.nn.softmax(x, axis=axis) def softplus(x): """Softplus of a tensor. # Arguments x: A tensor or variable. # Returns A tensor. """ return tf.nn.softplus(x) def softsign(x): """Softsign of a tensor. # Arguments x: A tensor or variable. # Returns A tensor. """ return tf.nn.softsign(x) def categorical_crossentropy(target, output, from_logits=False, axis=-1): """Categorical crossentropy between an output tensor and a target tensor. # Arguments target: A tensor of the same shape as `output`. output: A tensor resulting from a softmax (unless `from_logits` is True, in which case `output` is expected to be the logits). from_logits: Boolean, whether `output` is the result of a softmax, or is a tensor of logits. axis: Int specifying the channels axis. `axis=-1` corresponds to data format `channels_last`, and `axis=1` corresponds to data format `channels_first`. # Returns Output tensor. # Raises ValueError: if `axis` is neither -1 nor one of the axes of `output`. """ output_dimensions = list(range(len(output.get_shape()))) if axis != -1 and axis not in output_dimensions: raise ValueError( '{}{}{}'.format( 'Unexpected channels axis {}. '.format(axis), 'Expected to be -1 or one of the axes of `output`, ', 'which has {} dimensions.'.format(len(output.get_shape())))) # Note: tf.nn.softmax_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: # scale preds so that the class probas of each sample sum to 1 output /= tf.reduce_sum(output, axis, True) # manual computation of crossentropy _epsilon = _to_tensor(epsilon(), output.dtype.base_dtype) output = tf.clip_by_value(output, _epsilon, 1. - _epsilon) return - tf.reduce_sum(target * tf.log(output), axis) else: return tf.nn.softmax_cross_entropy_with_logits(labels=target, logits=output) def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1): """Categorical crossentropy with integer targets. # Arguments target: An integer tensor. output: A tensor resulting from a softmax (unless `from_logits` is True, in which case `output` is expected to be the logits). from_logits: Boolean, whether `output` is the result of a softmax, or is a tensor of logits. axis: Int specifying the channels axis. `axis=-1` corresponds to data format `channels_last`, and `axis=1` corresponds to data format `channels_first`. # Returns Output tensor. # Raises ValueError: if `axis` is neither -1 nor one of the axes of `output`. """ output_dimensions = list(range(len(output.get_shape()))) if axis != -1 and axis not in output_dimensions: raise ValueError( '{}{}{}'.format( 'Unexpected channels axis {}. '.format(axis), 'Expected to be -1 or one of the axes of `output`, ', 'which has {} dimensions.'.format(len(output.get_shape())))) # If the channels are not in the last axis, move them to be there: if axis != -1 and axis != output_dimensions[-1]: permutation = output_dimensions[:axis] + output_dimensions[axis + 1:] permutation += [axis] output = tf.transpose(output, perm=permutation) # Note: tf.nn.sparse_softmax_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: _epsilon = _to_tensor(epsilon(), output.dtype.base_dtype) output = tf.clip_by_value(output, _epsilon, 1 - _epsilon) output = tf.log(output) output_shape = output.get_shape() targets = cast(flatten(target), 'int64') logits = tf.reshape(output, [-1, int(output_shape[-1])]) res = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=targets, logits=logits) if len(output_shape) >= 3: # if our output includes timestep dimension # or spatial dimensions we need to reshape return tf.reshape(res, tf.shape(output)[:-1]) else: return res def binary_crossentropy(target, output, from_logits=False): """Binary crossentropy between an output tensor and a target tensor. # Arguments target: A tensor with the same shape as `output`. output: A tensor. from_logits: Whether `output` is expected to be a logits tensor. By default, we consider that `output` encodes a probability distribution. # Returns A tensor. """ # Note: tf.nn.sigmoid_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: # transform back to logits _epsilon = _to_tensor(epsilon(), output.dtype.base_dtype) output = tf.clip_by_value(output, _epsilon, 1 - _epsilon) output = tf.log(output / (1 - output)) return tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output) def sigmoid(x): """Element-wise sigmoid. # Arguments x: A tensor or variable. # Returns A tensor. """ return tf.nn.sigmoid(x) def hard_sigmoid(x): """Segment-wise linear approximation of sigmoid. Faster than sigmoid. Returns `0.` if `x < -2.5`, `1.` if `x > 2.5`. In `-2.5 <= x <= 2.5`, returns `0.2 * x + 0.5`. # Arguments x: A tensor or variable. # Returns A tensor. """ x = (0.2 * x) + 0.5 zero = _to_tensor(0., x.dtype.base_dtype) one = _to_tensor(1., x.dtype.base_dtype) x = tf.clip_by_value(x, zero, one) return x def tanh(x): """Element-wise tanh. # Arguments x: A tensor or variable. # Returns A tensor. """ return tf.nn.tanh(x) def dropout(x, level, noise_shape=None, seed=None): """Sets entries in `x` to zero at random, while scaling the entire tensor. # Arguments x: tensor level: fraction of the entries in the tensor that will be set to 0. noise_shape: shape for randomly generated keep/drop flags, must be broadcastable to the shape of `x` seed: random seed to ensure determinism. # Returns A tensor. """ retain_prob = 1. - level if seed is None: seed = np.random.randint(10e6) # the dummy 1. works around a TF bug # (float32_ref vs. float32 incompatibility) return tf.nn.dropout(x * 1., retain_prob, noise_shape, seed=seed) def l2_normalize(x, axis=None): """Normalizes a tensor wrt the L2 norm alongside the specified axis. # Arguments x: Tensor or variable. axis: axis along which to perform normalization. # Returns A tensor. """ return tf.nn.l2_normalize(x, axis=axis) def in_top_k(predictions, targets, k): """Returns whether the `targets` are in the top `k` `predictions`. # Arguments predictions: A tensor of shape `(batch_size, classes)` and type `float32`. targets: A 1D tensor of length `batch_size` and type `int32` or `int64`. k: An `int`, number of top elements to consider. # Returns A 1D tensor of length `batch_size` and type `bool`. `output[i]` is `True` if `predictions[i, targets[i]]` is within top-`k` values of `predictions[i]`. """ return tf.nn.in_top_k(predictions, targets, k) # CONVOLUTIONS def _preprocess_conv1d_input(x, data_format): """Transpose and cast the input before the conv1d. # Arguments x: input tensor. data_format: string, `"channels_last"` or `"channels_first"`. # Returns A tensor. """ # tensorflow doesn't support float64 for conv layer before 1.8.0 if (dtype(x) == 'float64' and StrictVersion(tf.__version__.split('-')[0]) < StrictVersion('1.8.0')): x = tf.cast(x, 'float32') tf_data_format = 'NWC' # to pass TF Conv2dNative operations if data_format == 'channels_first': if not _has_nchw_support(): x = tf.transpose(x, (0, 2, 1)) # NCW -> NWC else: tf_data_format = 'NCW' return x, tf_data_format def _preprocess_conv2d_input(x, data_format, force_transpose=False): """Transpose and cast the input before the conv2d. # Arguments x: input tensor. data_format: string, `"channels_last"` or `"channels_first"`. force_transpose: boolean, whether force to transpose input from NCHW to NHWC if the `data_format` is `"channels_first"`. # Returns A tensor. """ # tensorflow doesn't support float64 for conv layer before 1.8.0 if (dtype(x) == 'float64' and StrictVersion(tf.__version__.split('-')[0]) < StrictVersion('1.8.0')): x = tf.cast(x, 'float32') tf_data_format = 'NHWC' if data_format == 'channels_first': if not _has_nchw_support() or force_transpose: x = tf.transpose(x, (0, 2, 3, 1)) # NCHW -> NHWC else: tf_data_format = 'NCHW' return x, tf_data_format def _preprocess_conv3d_input(x, data_format): """Transpose and cast the input before the conv3d. # Arguments x: input tensor. data_format: string, `"channels_last"` or `"channels_first"`. # Returns A tensor. """ # tensorflow doesn't support float64 for conv layer before 1.8.0 if (dtype(x) == 'float64' and StrictVersion(tf.__version__.split('-')[0]) < StrictVersion('1.8.0')): x = tf.cast(x, 'float32') tf_data_format = 'NDHWC' if data_format == 'channels_first': if not _has_nchw_support(): x = tf.transpose(x, (0, 2, 3, 4, 1)) else: tf_data_format = 'NCDHW' return x, tf_data_format def _preprocess_padding(padding): """Convert keras' padding to tensorflow's padding. # Arguments padding: string, `"same"` or `"valid"`. # Returns a string, `"SAME"` or `"VALID"`. # Raises ValueError: if `padding` is invalid. """ if padding == 'same': padding = 'SAME' elif padding == 'valid': padding = 'VALID' else: raise ValueError('Invalid padding: ' + str(padding)) return padding def conv1d(x, kernel, strides=1, padding='valid', data_format=None, dilation_rate=1): """1D convolution. # Arguments x: Tensor or variable. kernel: kernel tensor. strides: stride integer. padding: string, `"same"`, `"causal"` or `"valid"`. data_format: string, `"channels_last"` or `"channels_first"`. dilation_rate: integer dilate rate. # Returns A tensor, result of 1D convolution. # Raises ValueError: If `data_format` is neither `"channels_last"` nor `"channels_first"`. """ data_format = normalize_data_format(data_format) kernel_shape = kernel.get_shape().as_list() if padding == 'causal': if data_format != 'channels_last': raise ValueError('When using causal padding in `conv1d`, ' '`data_format` must be "channels_last" ' '(temporal data).') # causal (dilated) convolution: left_pad = dilation_rate * (kernel_shape[0] - 1) x = temporal_padding(x, (left_pad, 0)) padding = 'valid' padding = _preprocess_padding(padding) x, tf_data_format = _preprocess_conv1d_input(x, data_format) x = tf.nn.convolution( input=x, filter=kernel, dilation_rate=(dilation_rate,), strides=(strides,), padding=padding, data_format=tf_data_format) if data_format == 'channels_first' and tf_data_format == 'NWC': x = tf.transpose(x, (0, 2, 1)) # NWC -> NCW return x def conv2d(x, kernel, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1)): """2D convolution. # Arguments x: Tensor or variable. kernel: kernel tensor. strides: strides tuple. padding: string, `"same"` or `"valid"`. data_format: string, `"channels_last"` or `"channels_first"`. Whether to use Theano or TensorFlow/CNTK data format for inputs/kernels/outputs. dilation_rate: tuple of 2 integers. # Returns A tensor, result of 2D convolution. # Raises ValueError: If `data_format` is neither `"channels_last"` nor `"channels_first"`. """ data_format = normalize_data_format(data_format) x, tf_data_format = _preprocess_conv2d_input(x, data_format) padding = _preprocess_padding(padding) x = tf.nn.convolution( input=x, filter=kernel, dilation_rate=dilation_rate, strides=strides, padding=padding, data_format=tf_data_format) if data_format == 'channels_first' and tf_data_format == 'NHWC': x = tf.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW return x def conv2d_transpose(x, kernel, output_shape, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1)): """2D deconvolution (i.e. transposed convolution). # Arguments x: Tensor or variable. kernel: kernel tensor. output_shape: 1D int tensor for the output shape. strides: strides tuple. padding: string, `"same"` or `"valid"`. data_format: string, `"channels_last"` or `"channels_first"`. Whether to use Theano or TensorFlow/CNTK data format for inputs/kernels/outputs. dilation_rate: tuple of 2 integers. # Returns A tensor, result of transposed 2D convolution. # Raises ValueError: If `data_format` is neither `"channels_last"` nor `"channels_first"`. """ data_format = normalize_data_format(data_format) if isinstance(output_shape, (tuple, list)): output_shape = tf.stack(output_shape) # tf.nn.atrous_conv2d_transpose input only supports NHWC format if data_format == 'channels_first' and dilation_rate != (1, 1): force_transpose = True else: force_transpose = False x, tf_data_format = _preprocess_conv2d_input(x, data_format, force_transpose) if data_format == 'channels_first' and tf_data_format == 'NHWC': output_shape = (output_shape[0], output_shape[2], output_shape[3], output_shape[1]) if output_shape[0] is None: output_shape = (tf.shape(x)[0],) + tuple(output_shape[1:]) output_shape = tf.stack(list(output_shape)) padding = _preprocess_padding(padding) if tf_data_format == 'NHWC': strides = (1,) + strides + (1,) else: strides = (1, 1) + strides if dilation_rate == (1, 1): x = tf.nn.conv2d_transpose(x, kernel, output_shape, strides, padding=padding, data_format=tf_data_format) else: assert dilation_rate[0] == dilation_rate[1] x = tf.nn.atrous_conv2d_transpose( x, kernel, output_shape, dilation_rate[0], padding) if data_format == 'channels_first' and tf_data_format == 'NHWC': x = tf.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW return x def separable_conv1d(x, depthwise_kernel, pointwise_kernel, strides=1, padding='valid', data_format=None, dilation_rate=1): """1D convolution with separable filters. # Arguments x: input tensor depthwise_kernel: convolution kernel for the depthwise convolution. pointwise_kernel: kernel for the 1x1 convolution. strides: stride integer. padding: string, `"same"` or `"valid"`. data_format: string, `"channels_last"` or `"channels_first"`. dilation_rate: integer dilation rate. # Returns Output tensor. # Raises ValueError: If `data_format` is neither `"channels_last"` nor `"channels_first"`. """ data_format = normalize_data_format(data_format) if isinstance(strides, int): strides = (strides,) if isinstance(dilation_rate, int): dilation_rate = (dilation_rate,) x, tf_data_format = _preprocess_conv1d_input(x, data_format) if tf_data_format == 'NWC': tf_data_format = 'NHWC' else: tf_data_format = 'NCHW' padding = _preprocess_padding(padding) if tf_data_format == 'NHWC': spatial_start_dim = 1 strides = (1,) + strides * 2 + (1,) else: spatial_start_dim = 2 strides = (1, 1) + strides * 2 x = tf.expand_dims(x, spatial_start_dim) depthwise_kernel = tf.expand_dims(depthwise_kernel, 0) pointwise_kernel = tf.expand_dims(pointwise_kernel, 0) dilation_rate = (1,) + dilation_rate x = tf.nn.separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=strides, padding=padding, rate=dilation_rate, data_format=tf_data_format) x = tf.squeeze(x, [spatial_start_dim]) if data_format == 'channels_first' and tf_data_format == 'NHWC': x = tf.transpose(x, (0, 2, 1)) # NWC -> NCW return x def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1)): """2D convolution with separable filters. # Arguments x: input tensor depthwise_kernel: convolution kernel for the depthwise convolution. pointwise_kernel: kernel for the 1x1 convolution. strides: strides tuple (length 2). padding: string, `"same"` or `"valid"`. data_format: string, `"channels_last"` or `"channels_first"`. dilation_rate: tuple of integers, dilation rates for the separable convolution. # Returns Output tensor. # Raises ValueError: If `data_format` is neither `"channels_last"` nor `"channels_first"`. """ data_format = normalize_data_format(data_format) x, tf_data_format = _preprocess_conv2d_input(x, data_format) padding = _preprocess_padding(padding) if tf_data_format == 'NHWC': strides = (1,) + strides + (1,) else: strides = (1, 1) + strides x = tf.nn.separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=strides, padding=padding, rate=dilation_rate, data_format=tf_data_format) if data_format == 'channels_first' and tf_data_format == 'NHWC': x = tf.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW return x def depthwise_conv2d(x, depthwise_kernel, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1)): """2D convolution with separable filters. # Arguments x: input tensor depthwise_kernel: convolution kernel for the depthwise convolution. strides: strides tuple (length 2). padding: string, `"same"` or `"valid"`. data_format: string, `"channels_last"` or `"channels_first"`. dilation_rate: tuple of integers, dilation rates for the separable convolution. # Returns Output tensor. # Raises ValueError: If `data_format` is neither `"channels_last"` nor `"channels_first"`. """ data_format = normalize_data_format(data_format) x, tf_data_format = _preprocess_conv2d_input(x, data_format) padding = _preprocess_padding(padding) if tf_data_format == 'NHWC': strides = (1,) + strides + (1,) else: strides = (1, 1) + strides x = tf.nn.depthwise_conv2d(x, depthwise_kernel, strides=strides, padding=padding, rate=dilation_rate, data_format=tf_data_format) if data_format == 'channels_first' and tf_data_format == 'NHWC': x = tf.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW return x def conv3d(x, kernel, strides=(1, 1, 1), padding='valid', data_format=None, dilation_rate=(1, 1, 1)): """3D convolution. # Arguments x: Tensor or variable. kernel: kernel tensor. strides: strides tuple. padding: string, `"same"` or `"valid"`. data_format: string, `"channels_last"` or `"channels_first"`. Whether to use Theano or TensorFlow/CNTK data format for inputs/kernels/outputs. dilation_rate: tuple of 3 integers. # Returns A tensor, result of 3D convolution. # Raises ValueError: If `data_format` is neither `"channels_last"` nor `"channels_first"`. """ data_format = normalize_data_format(data_format) x, tf_data_format = _preprocess_conv3d_input(x, data_format) padding = _preprocess_padding(padding) x = tf.nn.convolution( input=x, filter=kernel, dilation_rate=dilation_rate, strides=strides, padding=padding, data_format=tf_data_format) if data_format == 'channels_first' and tf_data_format == 'NDHWC': x = tf.transpose(x, (0, 4, 1, 2, 3)) return x def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1), padding='valid', data_format=None): """3D deconvolution (i.e. transposed convolution). # Arguments x: input tensor. kernel: kernel tensor. output_shape: 1D int tensor for the output shape. strides: strides tuple. padding: string, "same" or "valid". data_format: string, `"channels_last"` or `"channels_first"`. Whether to use Theano or TensorFlow/CNTK data format for inputs/kernels/outputs. # Returns A tensor, result of transposed 3D convolution. # Raises ValueError: If `data_format` is neither `"channels_last"` nor `"channels_first"`. """ data_format = normalize_data_format(data_format) if isinstance(output_shape, (tuple, list)): output_shape = tf.stack(output_shape) x, tf_data_format = _preprocess_conv3d_input(x, data_format) if data_format == 'channels_first' and tf_data_format == 'NDHWC': output_shape = (output_shape[0], output_shape[2], output_shape[3], output_shape[4], output_shape[1]) if output_shape[0] is None: output_shape = (tf.shape(x)[0],) + tuple(output_shape[1:]) output_shape = tf.stack(list(output_shape)) padding = _preprocess_padding(padding) if tf_data_format == 'NDHWC': strides = (1,) + strides + (1,) else: strides = (1, 1) + strides x = tf.nn.conv3d_transpose(x, kernel, output_shape, strides, padding=padding, data_format=tf_data_format) if data_format == 'channels_first' and tf_data_format == 'NDHWC': x = tf.transpose(x, (0, 4, 1, 2, 3)) return x def pool2d(x, pool_size, strides=(1, 1), padding='valid', data_format=None, pool_mode='max'): """2D Pooling. # Arguments x: Tensor or variable. pool_size: tuple of 2 integers. strides: tuple of 2 integers. padding: string, `"same"` or `"valid"`. data_format: string, `"channels_last"` or `"channels_first"`. pool_mode: string, `"max"` or `"avg"`. # Returns A tensor, result of 2D pooling. # Raises ValueError: if `data_format` is neither `"channels_last"` or `"channels_first"`. ValueError: if `pool_mode` is neither `"max"` or `"avg"`. """ data_format = normalize_data_format(data_format) x, tf_data_format = _preprocess_conv2d_input(x, data_format) padding = _preprocess_padding(padding) if tf_data_format == 'NHWC': strides = (1,) + strides + (1,) pool_size = (1,) + pool_size + (1,) else: strides = (1, 1) + strides pool_size = (1, 1) + pool_size if pool_mode == 'max': x = tf.nn.max_pool(x, pool_size, strides, padding=padding, data_format=tf_data_format) elif pool_mode == 'avg': x = tf.nn.avg_pool(x, pool_size, strides, padding=padding, data_format=tf_data_format) else: raise ValueError('Invalid pool_mode: ' + str(pool_mode)) if data_format == 'channels_first' and tf_data_format == 'NHWC': x = tf.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW return x def pool3d(x, pool_size, strides=(1, 1, 1), padding='valid', data_format=None, pool_mode='max'): """3D Pooling. # Arguments x: Tensor or variable. pool_size: tuple of 3 integers. strides: tuple of 3 integers. padding: string, `"same"` or `"valid"`. data_format: string, `"channels_last"` or `"channels_first"`. pool_mode: string, `"max"` or `"avg"`. # Returns A tensor, result of 3D pooling. # Raises ValueError: if `data_format` is neither `"channels_last"` or `"channels_first"`. ValueError: if `pool_mode` is neither `"max"` or `"avg"`. """ data_format = normalize_data_format(data_format) x, tf_data_format = _preprocess_conv3d_input(x, data_format) padding = _preprocess_padding(padding) if tf_data_format == 'NDHWC': strides = (1,) + strides + (1,) pool_size = (1,) + pool_size + (1,) else: strides = (1, 1) + strides pool_size = (1, 1) + pool_size if pool_mode == 'max': x = tf.nn.max_pool3d(x, pool_size, strides, padding=padding, data_format=tf_data_format) elif pool_mode == 'avg': x = tf.nn.avg_pool3d(x, pool_size, strides, padding=padding, data_format=tf_data_format) else: raise ValueError('Invalid pool_mode: ' + str(pool_mode)) if data_format == 'channels_first' and tf_data_format == 'NDHWC': x = tf.transpose(x, (0, 4, 1, 2, 3)) return x def bias_add(x, bias, data_format=None): """Adds a bias vector to a tensor. # Arguments x: Tensor or variable. bias: Bias tensor to add. data_format: string, `"channels_last"` or `"channels_first"`. # Returns Output tensor. # Raises ValueError: In one of the two cases below: 1. invalid `data_format` argument. 2. invalid bias shape. the bias should be either a vector or a tensor with ndim(x) - 1 dimension """ data_format = normalize_data_format(data_format) bias_shape = int_shape(bias) if len(bias_shape) != 1 and len(bias_shape) != ndim(x) - 1: raise ValueError('Unexpected bias dimensions %d, expect to be 1 or %d dimensions' % (len(bias_shape), ndim(x))) if ndim(x) == 5: if len(bias_shape) == 1: new_shape = (1, 1, 1, 1, bias_shape[0]) else: new_shape = (1,) + bias_shape new_shape = transpose_shape(new_shape, data_format, spatial_axes=(1, 2, 3)) x += reshape(bias, new_shape) elif ndim(x) == 4: if data_format == 'channels_first': if len(bias_shape) == 1: if _has_nchw_support(): x = tf.nn.bias_add(x, bias, data_format='NCHW') else: x += reshape(bias, (1, bias_shape[0], 1, 1)) else: x += reshape(bias, (1, bias_shape[2]) + bias_shape[:2]) elif data_format == 'channels_last': if len(bias_shape) == 1: x = tf.nn.bias_add(x, bias, data_format='NHWC') else: x += reshape(bias, (1,) + bias_shape) elif ndim(x) == 3: if len(bias_shape) == 1: new_shape = (1, 1, bias_shape[0]) else: new_shape = (1,) + bias_shape new_shape = transpose_shape(new_shape, data_format, spatial_axes=(1,)) x += reshape(bias, new_shape) else: x = tf.nn.bias_add(x, bias) return x # RANDOMNESS def random_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): """Returns a tensor with normal distribution of values. # Arguments shape: A tuple of integers, the shape of tensor to create. mean: A float, mean of the normal distribution to draw samples. stddev: A float, standard deviation of the normal distribution to draw samples. dtype: String, dtype of returned tensor. seed: Integer, random seed. # Returns A tensor. """ if dtype is None: dtype = floatx() if seed is None: seed = np.random.randint(10e6) return tf.random_normal(shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed) def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None): """Returns a tensor with uniform distribution of values. # Arguments shape: A tuple of integers, the shape of tensor to create. minval: A float, lower boundary of the uniform distribution to draw samples. maxval: A float, upper boundary of the uniform distribution to draw samples. dtype: String, dtype of returned tensor. seed: Integer, random seed. # Returns A tensor. """ if dtype is None: dtype = floatx() if seed is None: seed = np.random.randint(10e6) return tf.random_uniform(shape, minval=minval, maxval=maxval, dtype=dtype, seed=seed) def random_binomial(shape, p=0.0, dtype=None, seed=None): """Returns a tensor with random binomial distribution of values. # Arguments shape: A tuple of integers, the shape of tensor to create. p: A float, `0. <= p <= 1`, probability of binomial distribution. dtype: String, dtype of returned tensor. seed: Integer, random seed. # Returns A tensor. """ if dtype is None: dtype = floatx() if seed is None: seed = np.random.randint(10e6) return tf.where(tf.random_uniform(shape, dtype=dtype, seed=seed) <= p, tf.ones(shape, dtype=dtype), tf.zeros(shape, dtype=dtype)) def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): """Returns a tensor with truncated random normal distribution of values. The generated values follow a normal distribution with specified mean and standard deviation, except that values whose magnitude is more than two standard deviations from the mean are dropped and re-picked. # Arguments shape: A tuple of integers, the shape of tensor to create. mean: Mean of the values. stddev: Standard deviation of the values. dtype: String, dtype of returned tensor. seed: Integer, random seed. # Returns A tensor. """ if dtype is None: dtype = floatx() if seed is None: seed = np.random.randint(10e6) return tf.truncated_normal(shape, mean, stddev, dtype=dtype, seed=seed) # CTC # TensorFlow has a native implementation, but it uses sparse tensors # and therefore requires a wrapper for Keras. The functions below convert # dense to sparse tensors and also wraps up the beam search code that is # in TensorFlow's CTC implementation def ctc_label_dense_to_sparse(labels, label_lengths): """Converts CTC labels from dense to sparse. # Arguments labels: dense CTC labels. label_lengths: length of the labels. # Returns A sparse tensor representation of the labels. """ label_shape = tf.shape(labels) num_batches_tns = tf.stack([label_shape[0]]) max_num_labels_tns = tf.stack([label_shape[1]]) def range_less_than(_, current_input): return tf.expand_dims(tf.range(label_shape[1]), 0) < tf.fill( max_num_labels_tns, current_input) init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool) dense_mask = functional_ops.scan(range_less_than, label_lengths, initializer=init, parallel_iterations=1) dense_mask = dense_mask[:, 0, :] label_array = tf.reshape(tf.tile(tf.range(label_shape[1]), num_batches_tns), label_shape) label_ind = tf.boolean_mask(label_array, dense_mask) batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(label_shape[0]), max_num_labels_tns), reverse(label_shape, 0))) batch_ind = tf.boolean_mask(batch_array, dense_mask) indices = tf.transpose(tf.reshape(concatenate([batch_ind, label_ind], axis=0), [2, -1])) vals_sparse = tf.gather_nd(labels, indices) return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape)) def ctc_batch_cost(y_true, y_pred, input_length, label_length): """Runs CTC loss algorithm on each batch element. # Arguments y_true: tensor `(samples, max_string_length)` containing the truth labels. y_pred: tensor `(samples, time_steps, num_categories)` containing the prediction, or output of the softmax. input_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_pred`. label_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_true`. # Returns Tensor with shape (samples,1) containing the CTC loss of each element. """ label_length = tf.to_int32(tf.squeeze(label_length, axis=-1)) input_length = tf.to_int32(tf.squeeze(input_length, axis=-1)) sparse_labels = tf.to_int32(ctc_label_dense_to_sparse(y_true, label_length)) y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + epsilon()) return tf.expand_dims(ctc.ctc_loss(inputs=y_pred, labels=sparse_labels, sequence_length=input_length), 1) def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1): """Decodes the output of a softmax. Can use either greedy search (also known as best path) or a constrained dictionary search. # Arguments y_pred: tensor `(samples, time_steps, num_categories)` containing the prediction, or output of the softmax. input_length: tensor `(samples, )` containing the sequence length for each batch item in `y_pred`. greedy: perform much faster best-path search if `true`. This does not use a dictionary. beam_width: if `greedy` is `false`: a beam search decoder will be used with a beam of this width. top_paths: if `greedy` is `false`, how many of the most probable paths will be returned. # Returns Tuple: List: if `greedy` is `true`, returns a list of one element that contains the decoded sequence. If `false`, returns the `top_paths` most probable decoded sequences. Important: blank labels are returned as `-1`. Tensor `(top_paths, )` that contains the log probability of each decoded sequence. """ y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + epsilon()) input_length = tf.to_int32(input_length) if greedy: (decoded, log_prob) = ctc.ctc_greedy_decoder( inputs=y_pred, sequence_length=input_length) else: (decoded, log_prob) = ctc.ctc_beam_search_decoder( inputs=y_pred, sequence_length=input_length, beam_width=beam_width, top_paths=top_paths) decoded_dense = [tf.sparse_to_dense(st.indices, st.dense_shape, st.values, default_value=-1) for st in decoded] return (decoded_dense, log_prob) # HIGH ORDER FUNCTIONS def map_fn(fn, elems, name=None, dtype=None): """Map the function fn over the elements elems and return the outputs. # Arguments fn: Callable that will be called upon each element in elems elems: tensor name: A string name for the map node in the graph dtype: Output data type. # Returns Tensor with dtype `dtype`. """ return tf.map_fn(fn, elems, name=name, dtype=dtype) def foldl(fn, elems, initializer=None, name=None): """Reduce elems using fn to combine them from left to right. # Arguments fn: Callable that will be called upon each element in elems and an accumulator, for instance `lambda acc, x: acc + x` elems: tensor initializer: The first value used (`elems[0]` in case of None) name: A string name for the foldl node in the graph # Returns Tensor with same type and shape as `initializer`. """ return tf.foldl(fn, elems, initializer=initializer, name=name) def foldr(fn, elems, initializer=None, name=None): """Reduce elems using fn to combine them from right to left. # Arguments fn: Callable that will be called upon each element in elems and an accumulator, for instance `lambda acc, x: acc + x` elems: tensor initializer: The first value used (`elems[-1]` in case of None) name: A string name for the foldr node in the graph # Returns Tensor with same type and shape as `initializer`. """ return tf.foldr(fn, elems, initializer=initializer, name=name) def local_conv1d(inputs, kernel, kernel_size, strides, data_format=None): """Apply 1D conv with un-shared weights. # Arguments inputs: 3D tensor with shape: (batch_size, steps, input_dim) kernel: the unshared weight for convolution, with shape (output_length, feature_dim, filters) kernel_size: a tuple of a single integer, specifying the length of the 1D convolution window strides: a tuple of a single integer, specifying the stride length of the convolution data_format: the data format, channels_first or channels_last # Returns the tensor after 1d conv with un-shared weights, with shape (batch_size, output_length, filters) # Raises ValueError: If `data_format` is neither `"channels_last"` nor `"channels_first"`. """ data_format = normalize_data_format(data_format) stride = strides[0] kernel_shape = int_shape(kernel) output_length, feature_dim, filters = kernel_shape xs = [] for i in range(output_length): slice_length = py_slice(i * stride, i * stride + kernel_size[0]) xs.append(reshape(inputs[:, slice_length, :], (1, -1, feature_dim))) x_aggregate = concatenate(xs, axis=0) # Shape: `(output_length, batch_size, filters)`. output = batch_dot(x_aggregate, kernel) return permute_dimensions(output, (1, 0, 2)) def local_conv2d(inputs, kernel, kernel_size, strides, output_shape, data_format=None): """Apply 2D conv with un-shared weights. # Arguments inputs: 4D tensor with shape: (batch_size, filters, new_rows, new_cols) if data_format='channels_first' or 4D tensor with shape: (batch_size, new_rows, new_cols, filters) if data_format='channels_last'. kernel: the unshared weight for convolution, with shape (output_items, feature_dim, filters) kernel_size: a tuple of 2 integers, specifying the width and height of the 2D convolution window. strides: a tuple of 2 integers, specifying the strides of the convolution along the width and height. output_shape: a tuple with (output_row, output_col) data_format: the data format, channels_first or channels_last # Returns A 4d tensor with shape: (batch_size, filters, new_rows, new_cols) if data_format='channels_first' or 4D tensor with shape: (batch_size, new_rows, new_cols, filters) if data_format='channels_last'. # Raises ValueError: if `data_format` is neither `channels_last` or `channels_first`. """ data_format = normalize_data_format(data_format) stride_row, stride_col = strides output_row, output_col = output_shape kernel_shape = int_shape(kernel) _, feature_dim, filters = kernel_shape xs = [] for i in range(output_row): for j in range(output_col): slice_row = py_slice(i * stride_row, i * stride_row + kernel_size[0]) slice_col = py_slice(j * stride_col, j * stride_col + kernel_size[1]) if data_format == 'channels_first': xs.append(reshape(inputs[:, :, slice_row, slice_col], (1, -1, feature_dim))) else: xs.append(reshape(inputs[:, slice_row, slice_col, :], (1, -1, feature_dim))) x_aggregate = concatenate(xs, axis=0) output = batch_dot(x_aggregate, kernel) output = reshape(output, (output_row, output_col, -1, filters)) if data_format == 'channels_first': output = permute_dimensions(output, (2, 3, 0, 1)) else: output = permute_dimensions(output, (2, 0, 1, 3)) return output Keras-2.2.4/keras/backend/common.py0000644000000000116100000001216413326715636016677 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np # the type of float to use throughout the session. _FLOATX = 'float32' _EPSILON = 1e-7 _IMAGE_DATA_FORMAT = 'channels_last' def epsilon(): """Returns the value of the fuzz factor used in numeric expressions. # Returns A float. # Example ```python >>> keras.backend.epsilon() 1e-07 ``` """ return _EPSILON def set_epsilon(e): """Sets the value of the fuzz factor used in numeric expressions. # Arguments e: float. New value of epsilon. # Example ```python >>> from keras import backend as K >>> K.epsilon() 1e-07 >>> K.set_epsilon(1e-05) >>> K.epsilon() 1e-05 ``` """ global _EPSILON _EPSILON = e def floatx(): """Returns the default float type, as a string. (e.g. 'float16', 'float32', 'float64'). # Returns String, the current default float type. # Example ```python >>> keras.backend.floatx() 'float32' ``` """ return _FLOATX def set_floatx(floatx): """Sets the default float type. # Arguments floatx: String, 'float16', 'float32', or 'float64'. # Example ```python >>> from keras import backend as K >>> K.floatx() 'float32' >>> K.set_floatx('float16') >>> K.floatx() 'float16' ``` """ global _FLOATX if floatx not in {'float16', 'float32', 'float64'}: raise ValueError('Unknown floatx type: ' + str(floatx)) _FLOATX = str(floatx) def cast_to_floatx(x): """Cast a Numpy array to the default Keras float type. # Arguments x: Numpy array. # Returns The same Numpy array, cast to its new type. # Example ```python >>> from keras import backend as K >>> K.floatx() 'float32' >>> arr = numpy.array([1.0, 2.0], dtype='float64') >>> arr.dtype dtype('float64') >>> new_arr = K.cast_to_floatx(arr) >>> new_arr array([ 1., 2.], dtype=float32) >>> new_arr.dtype dtype('float32') ``` """ return np.asarray(x, dtype=_FLOATX) def image_data_format(): """Returns the default image data format convention ('channels_first' or 'channels_last'). # Returns A string, either `'channels_first'` or `'channels_last'` # Example ```python >>> keras.backend.image_data_format() 'channels_first' ``` """ return _IMAGE_DATA_FORMAT def set_image_data_format(data_format): """Sets the value of the data format convention. # Arguments data_format: string. `'channels_first'` or `'channels_last'`. # Example ```python >>> from keras import backend as K >>> K.image_data_format() 'channels_first' >>> K.set_image_data_format('channels_last') >>> K.image_data_format() 'channels_last' ``` """ global _IMAGE_DATA_FORMAT if data_format not in {'channels_last', 'channels_first'}: raise ValueError('Unknown data_format:', data_format) _IMAGE_DATA_FORMAT = str(data_format) def normalize_data_format(value): """Checks that the value correspond to a valid data format. # Arguments value: String or None. `'channels_first'` or `'channels_last'`. # Returns A string, either `'channels_first'` or `'channels_last'` # Example ```python >>> from keras import backend as K >>> K.normalize_data_format(None) 'channels_first' >>> K.normalize_data_format('channels_last') 'channels_last' ``` # Raises ValueError: if `value` or the global `data_format` invalid. """ if value is None: value = image_data_format() data_format = value.lower() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('The `data_format` argument must be one of ' '"channels_first", "channels_last". Received: ' + str(value)) return data_format # Legacy methods def set_image_dim_ordering(dim_ordering): """Legacy setter for `image_data_format`. # Arguments dim_ordering: string. `tf` or `th`. # Example ```python >>> from keras import backend as K >>> K.image_data_format() 'channels_first' >>> K.set_image_data_format('channels_last') >>> K.image_data_format() 'channels_last' ``` # Raises ValueError: if `dim_ordering` is invalid. """ global _IMAGE_DATA_FORMAT if dim_ordering not in {'tf', 'th'}: raise ValueError('Unknown dim_ordering:', dim_ordering) if dim_ordering == 'th': data_format = 'channels_first' else: data_format = 'channels_last' _IMAGE_DATA_FORMAT = data_format def image_dim_ordering(): """Legacy getter for `image_data_format`. # Returns string, one of `'th'`, `'tf'` """ if _IMAGE_DATA_FORMAT == 'channels_first': return 'th' else: return 'tf' Keras-2.2.4/keras/backend/cntk_backend.py0000644000000000116100000024241113354530144020003 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function import cntk as C import numpy as np from .common import floatx from .common import epsilon from .common import image_data_format from .common import normalize_data_format from ..utils.generic_utils import transpose_shape from collections import defaultdict from contextlib import contextmanager import warnings C.set_global_option('align_axis', 1) b_any = any py_slice = slice dev = C.device.use_default_device() if dev.type() == 0: warnings.warn( 'CNTK backend warning: GPU is not detected. ' 'CNTK\'s CPU version is not fully optimized,' 'please run with GPU to get better performance.') # A learning phase is a bool tensor used to run Keras models in # either train mode (learning_phase == 1) or test mode (learning_phase == 0). # LEARNING_PHASE_PLACEHOLDER is the placeholder for dynamic learning phase _LEARNING_PHASE_PLACEHOLDER = C.constant(shape=(), dtype=np.float32, value=1.0, name='_keras_learning_phase') # static learning phase flag, if it is not 0 or 1, we will go with dynamic learning phase tensor. _LEARNING_PHASE = -1 _UID_PREFIXES = defaultdict(int) # cntk doesn't support gradient as symbolic op, to hook up with keras model, # we will create gradient as a constant placeholder, here use this global # map to keep the mapping from grad placeholder to parameter grad_parameter_dict = {} NAME_SCOPE_STACK = [] @contextmanager def name_scope(name): global NAME_SCOPE_STACK NAME_SCOPE_STACK.append(name) yield NAME_SCOPE_STACK.pop() def get_uid(prefix=''): _UID_PREFIXES[prefix] += 1 return _UID_PREFIXES[prefix] def learning_phase(): # If _LEARNING_PHASE is not 0 or 1, return dynamic learning phase tensor return _LEARNING_PHASE if _LEARNING_PHASE in {0, 1} else _LEARNING_PHASE_PLACEHOLDER def set_learning_phase(value): global _LEARNING_PHASE if value not in {0, 1}: raise ValueError('CNTK Backend: Set learning phase ' 'with value %s is not supported, ' 'expected 0 or 1.' % value) _LEARNING_PHASE = value def clear_session(): """Reset learning phase flag for cntk backend. """ global _LEARNING_PHASE global _LEARNING_PHASE_PLACEHOLDER _LEARNING_PHASE = -1 _LEARNING_PHASE_PLACEHOLDER.value = np.asarray(1.0) def in_train_phase(x, alt, training=None): global _LEARNING_PHASE if training is None: training = learning_phase() uses_learning_phase = True else: uses_learning_phase = False # CNTK currently don't support cond op, so here we use # element_select approach as workaround. It may have # perf issue, will resolve it later with cntk cond op. if callable(x) and isinstance(x, C.cntk_py.Function) is False: x = x() if callable(alt) and isinstance(alt, C.cntk_py.Function) is False: alt = alt() if training is True: x._uses_learning_phase = uses_learning_phase return x else: # if _LEARNING_PHASE is static if isinstance(training, int) or isinstance(training, bool): result = x if training == 1 or training is True else alt else: result = C.element_select(training, x, alt) result._uses_learning_phase = uses_learning_phase return result def in_test_phase(x, alt, training=None): return in_train_phase(alt, x, training=training) def _convert_string_dtype(dtype): if dtype == 'float32': return np.float32 elif dtype == 'float64': return np.float64 elif dtype == 'float16': return np.float16 else: # cntk only running with float, # try to cast to float to run the model return np.float32 def _convert_dtype_string(dtype): if dtype == np.float32: return 'float32' elif dtype == np.float64: return 'float64' elif dtype == np.float16: return 'float16' else: raise ValueError('CNTK Backend: Unsupported dtype: %s. ' 'CNTK only supports float32, float64, and ' 'float16.' % dtype) def variable(value, dtype=None, name=None, constraint=None): """Instantiates a variable and returns it. # Arguments value: Numpy array, initial value of the tensor. dtype: Tensor type. name: Optional name string for the tensor. constraint: Optional projection function to be applied to the variable after an optimizer update. # Returns A variable instance (with Keras metadata included). """ if dtype is None: dtype = floatx() if name is None: name = '' if isinstance( value, C.variables.Constant) or isinstance( value, C.variables.Parameter): value = value.value # we don't support init parameter with symbolic op, so eval it first as # workaround if isinstance(value, C.cntk_py.Function): value = eval(value) shape = value.shape if hasattr(value, 'shape') else () if hasattr(value, 'dtype') and value.dtype != dtype and len(shape) > 0: value = value.astype(dtype) # TODO: remove the conversion when cntk supports int32, int64 # https://docs.microsoft.com/en-us/python/api/cntk.variables.parameter dtype = 'float32' if 'int' in str(dtype) else dtype v = C.parameter(shape=shape, init=value, dtype=dtype, name=_prepare_name(name, 'variable')) v._keras_shape = v.shape v._uses_learning_phase = False v.constraint = constraint return v def bias_add(x, bias, data_format=None): data_format = normalize_data_format(data_format) dims = len(x.shape) if dims > 0 and x.shape[0] == C.InferredDimension: dims -= 1 bias_dims = len(bias.shape) if bias_dims != 1 and bias_dims != dims: raise ValueError('Unexpected bias dimensions %d, ' 'expected 1 or %d dimensions' % (bias_dims, dims)) if dims == 4: if data_format == 'channels_first': if bias_dims == 1: shape = (bias.shape[0], 1, 1, 1) else: shape = (bias.shape[3],) + bias.shape[:3] elif data_format == 'channels_last': if bias_dims == 1: shape = (1, 1, 1, bias.shape[0]) else: shape = bias.shape elif dims == 3: if data_format == 'channels_first': if bias_dims == 1: shape = (bias.shape[0], 1, 1) else: shape = (bias.shape[2],) + bias.shape[:2] elif data_format == 'channels_last': if bias_dims == 1: shape = (1, 1, bias.shape[0]) else: shape = bias.shape elif dims == 2: if data_format == 'channels_first': if bias_dims == 1: shape = (bias.shape[0], 1) else: shape = (bias.shape[1],) + bias.shape[:1] elif data_format == 'channels_last': if bias_dims == 1: shape = (1, bias.shape[0]) else: shape = bias.shape else: shape = bias.shape return x + reshape(bias, shape) def eval(x): if isinstance(x, C.cntk_py.Function): return x.eval() elif isinstance(x, C.variables.Constant) or isinstance(x, C.variables.Parameter): return x.value else: raise ValueError('CNTK Backend: `eval` method on ' '`%s` type is not supported. ' 'CNTK only supports `eval` with ' '`Function`, `Constant` or ' '`Parameter`.' % type(x)) def placeholder( shape=None, ndim=None, dtype=None, sparse=False, name=None, dynamic_axis_num=1): if dtype is None: dtype = floatx() if not shape: if ndim: shape = tuple([None for _ in range(ndim)]) dynamic_dimension = C.FreeDimension if _get_cntk_version() >= 2.2 else C.InferredDimension cntk_shape = [dynamic_dimension if s is None else s for s in shape] cntk_shape = tuple(cntk_shape) if dynamic_axis_num > len(cntk_shape): raise ValueError('CNTK backend: creating placeholder with ' '%d dimension is not supported, at least ' '%d dimensions are needed.' % (len(cntk_shape), dynamic_axis_num)) if name is None: name = '' cntk_shape = cntk_shape[dynamic_axis_num:] x = C.input( shape=cntk_shape, dtype=_convert_string_dtype(dtype), is_sparse=sparse, name=name) x._keras_shape = shape x._uses_learning_phase = False x._cntk_placeholder = True return x def is_placeholder(x): """Returns whether `x` is a placeholder. # Arguments x: A candidate placeholder. # Returns Boolean. """ return hasattr(x, '_cntk_placeholder') and x._cntk_placeholder def is_keras_tensor(x): if not is_tensor(x): raise ValueError('Unexpectedly found an instance of type `' + str(type(x)) + '`. ' 'Expected a symbolic tensor instance.') return hasattr(x, '_keras_history') def is_tensor(x): return isinstance(x, (C.variables.Constant, C.variables.Variable, C.variables.Parameter, C.ops.functions.Function)) def shape(x): shape = list(int_shape(x)) num_dynamic = _get_dynamic_axis_num(x) non_dyn_shape = [] for i in range(len(x.shape)): if shape[i + num_dynamic] is None: non_dyn_shape.append(x.shape[i]) else: non_dyn_shape.append(shape[i + num_dynamic]) return shape[:num_dynamic] + non_dyn_shape def is_sparse(tensor): return tensor.is_sparse def int_shape(x): if hasattr(x, '_keras_shape'): return x._keras_shape shape = x.shape if hasattr(x, 'dynamic_axes'): dynamic_shape = [None for a in x.dynamic_axes] shape = tuple(dynamic_shape) + shape return shape def ndim(x): shape = int_shape(x) return len(shape) def _prepare_name(name, default): prefix = '_'.join(NAME_SCOPE_STACK) if name is None or name == '': return prefix + '/' + default return prefix + '/' + name def constant(value, dtype=None, shape=None, name=None): if dtype is None: dtype = floatx() if shape is None: shape = () np_value = value * np.ones(shape) const = C.constant(np_value, dtype=dtype, name=_prepare_name(name, 'constant')) const._keras_shape = const.shape const._uses_learning_phase = False return const def random_binomial(shape, p=0.0, dtype=None, seed=None): if seed is None: # ensure that randomness is conditioned by the Numpy RNG seed = np.random.randint(10e7) if dtype is None: dtype = np.float32 else: dtype = _convert_string_dtype(dtype) for _ in shape: if _ is None: raise ValueError('CNTK Backend: randomness op with ' 'dynamic shape is not supported now. ' 'Please provide fixed dimension ' 'instead of `None`.') return C.random.bernoulli(shape=shape, dtype=dtype, mean=p, seed=seed) def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None): for _ in shape: if _ is None: raise ValueError('CNTK Backend: randomness op with ' 'dynamic shape is not supported now. ' 'Please provide fixed dimension ' 'instead of `None`.') if seed is None: # ensure that randomness is conditioned by the Numpy RNG seed = np.random.randint(10e3) return C.random.uniform(shape=shape, dtype=dtype, low=minval, high=maxval, seed=seed) def random_uniform_variable(shape, low, high, dtype=None, name=None, seed=None): if dtype is None: dtype = floatx() if seed is None: # ensure that randomness is conditioned by the Numpy RNG seed = np.random.randint(10e3) if dtype is None: dtype = np.float32 else: dtype = _convert_string_dtype(dtype) if name is None: name = '' scale = (high - low) / 2 p = C.parameter( shape, init=C.initializer.uniform( scale, seed=seed), dtype=dtype, name=name) return variable(value=p.value + low + scale) def random_normal_variable( shape, mean, scale, dtype=None, name=None, seed=None): if dtype is None: dtype = floatx() if seed is None: # ensure that randomness is conditioned by the Numpy RNG seed = np.random.randint(10e7) if dtype is None: dtype = np.float32 else: dtype = _convert_string_dtype(dtype) if name is None: name = '' p = C.parameter( shape=shape, init=C.initializer.normal( scale=scale, seed=seed), dtype=dtype, name=name) return variable(value=p.value + mean) def random_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): if dtype is None: dtype = floatx() for _ in shape: if _ is None: raise ValueError('CNTK Backend: randomness op with ' 'dynamic shape is not supported now. ' 'Please provide fixed dimension ' 'instead of `None`.') if seed is None: # ensure that randomness is conditioned by the Numpy RNG seed = np.random.randint(10e3) return C.random.normal(shape=shape, mean=mean, scale=stddev, seed=seed, dtype=dtype) def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): if seed is None: seed = np.random.randint(1, 10e6) if dtype is None: dtype = np.float32 else: dtype = _convert_string_dtype(dtype) return C.parameter( shape, init=C.initializer.truncated_normal( stddev, seed=seed), dtype=dtype) def dtype(x): return _convert_dtype_string(x.dtype) def zeros(shape, dtype=None, name=None): if dtype is None: dtype = floatx() ctype = _convert_string_dtype(dtype) return variable(value=np.zeros(shape, ctype), dtype=dtype, name=name) def ones(shape, dtype=None, name=None): if dtype is None: dtype = floatx() ctype = _convert_string_dtype(dtype) return variable(value=np.ones(shape, ctype), dtype=dtype, name=name) def eye(size, dtype=None, name=None): if dtype is None: dtype = floatx() return variable(np.eye(size), dtype, name) def zeros_like(x, dtype=None, name=None): return x * 0 def ones_like(x, dtype=None, name=None): return zeros_like(x) + 1 def count_params(x): for _ in x.shape: if _ == C.InferredDimension or _ == C.FreeDimension: raise ValueError('CNTK backend: `count_params` with dynamic ' 'shape is not supported. Please provide ' 'fixed dimension instead of `None`.') return np.prod(int_shape(x)) def cast(x, dtype): # cntk calculate everything in float, so don't need case from bool / int return x def dot(x, y): if len(x.shape) > 2 or len(y.shape) > 2: y_shape = int_shape(y) if len(y_shape) > 2: permutation = [len(y_shape) - 2] permutation += list(range(len(y_shape) - 2)) permutation += [len(y_shape) - 1] y = C.transpose(y, perm=permutation) return C.times(x, y, len(y_shape) - 1) else: return C.times(x, y) def batch_dot(x, y, axes=None): x_shape = int_shape(x) y_shape = int_shape(y) if isinstance(axes, int): axes = (axes, axes) if axes is None: # behaves like tf.batch_matmul as default axes = [len(x_shape) - 1, len(y_shape) - 2] if b_any([isinstance(a, (list, tuple)) for a in axes]): raise ValueError('Multiple target dimensions are not supported. ' + 'Expected: None, int, (int, int), ' + 'Provided: ' + str(axes)) if len(x_shape) == 2 and len(y_shape) == 2: if axes[0] == axes[1]: result = sum(x * y, axis=axes[0], keepdims=True) return result if axes[0] == 1 else transpose(result) else: return sum(x * transpose(y), axis=axes[0], keepdims=True) else: if len(y_shape) == 2: y = expand_dims(y) normalized_axis = [] normalized_axis.append(_normalize_axis(axes[0], x)[0]) normalized_axis.append(_normalize_axis(axes[1], y)[0]) # transpose i = normalized_axis[0] while i < len(x.shape) - 1: x = C.swapaxes(x, i, i + 1) i += 1 i = normalized_axis[1] while i > 0: y = C.swapaxes(y, i, i - 1) i -= 1 result = C.times(x, y, output_rank=(len(y.shape) - 1) if len(y.shape) > 1 else 1) if len(y_shape) == 2: result = squeeze(result, -1) return result def transpose(x): return C.swapaxes(x, 0, 1) def gather(reference, indices): # There is a bug in cntk gather op which may cause crash. # We have made a fix but not catched in CNTK 2.1 release. # Will update with gather op in next release if _get_cntk_version() >= 2.2: return C.ops.gather(reference, indices) else: num_classes = reference.shape[0] one_hot_matrix = C.ops.one_hot(indices, num_classes) return C.times(one_hot_matrix, reference, output_rank=len(reference.shape) - 1) def _remove_dims(x, axis, keepdims=False): if keepdims is False and isinstance(axis, list): # sequence axis is removed by default, so don't need reshape on it reduce_axes = [] for a in axis: if isinstance(a, C.Axis) is False: reduce_axes.append(a) return _reshape_dummy_dim(x, reduce_axes) else: if isinstance(axis, list): has_seq = False for a in axis: if isinstance(a, C.Axis): has_seq = True break if has_seq: nones = _get_dynamic_axis_num(x) x = expand_dims(x, nones) return x def max(x, axis=None, keepdims=False): axis = _normalize_axis(axis, x) output = _reduce_on_axis(x, axis, 'reduce_max') return _remove_dims(output, axis, keepdims) def min(x, axis=None, keepdims=False): axis = _normalize_axis(axis, x) output = _reduce_on_axis(x, axis, 'reduce_min') return _remove_dims(output, axis, keepdims) def sum(x, axis=None, keepdims=False): axis = _normalize_axis(axis, x) output = _reduce_on_axis(x, axis, 'reduce_sum') return _remove_dims(output, axis, keepdims) def prod(x, axis=None, keepdims=False): axis = _normalize_axis(axis, x) output = _reduce_on_axis(x, axis, 'reduce_prod') return _remove_dims(output, axis, keepdims) def logsumexp(x, axis=None, keepdims=False): return log(sum(exp(x), axis=axis, keepdims=keepdims)) def var(x, axis=None, keepdims=False): m = mean(x, axis, keepdims=True) devs_squared = C.square(x - m) return mean(devs_squared, axis=axis, keepdims=keepdims) def std(x, axis=None, keepdims=False): return C.sqrt(var(x, axis=axis, keepdims=keepdims)) def expand_dims(x, axis=-1): shape = list(int_shape(x)) nones = _get_dynamic_axis_num(x) index = axis if axis >= 0 else len(shape) + 1 shape.insert(index, 1) new_shape = shape[nones:] new_shape = tuple( [C.InferredDimension if _ is None else _ for _ in new_shape]) result = C.reshape(x, new_shape) if index < nones: result._keras_shape = shape return result def squeeze(x, axis): if isinstance(axis, tuple): axis = list(axis) if not isinstance(axis, list): axis = [axis] shape = list(int_shape(x)) _axis = [] for _ in axis: if isinstance(_, int): _axis.append(_ if _ >= 0 else _ + len(shape)) if len(_axis) == 0: return x nones = _get_dynamic_axis_num(x) for _ in sorted(_axis, reverse=True): del shape[_] new_shape = shape[nones:] new_shape = tuple([C.InferredDimension if _ == C.FreeDimension else _ for _ in new_shape]) return C.reshape(x, new_shape) def tile(x, n): if isinstance(n, int): n = (n,) elif isinstance(n, list): n = tuple(n) shape = int_shape(x) num_dynamic_axis = _get_dynamic_axis_num(x) # Padding the axis if len(n) < len(shape): n = tuple([1 for _ in range(len(shape) - len(n))]) + n if len(n) != len(shape): raise NotImplementedError i = num_dynamic_axis for i, rep in enumerate(n): if i >= num_dynamic_axis and shape[i] is not None: tmp = [x] * rep x = C.splice(*tmp, axis=i - num_dynamic_axis) i += 1 return x def _normalize_axis(axis, x): shape = int_shape(x) ndim = len(shape) nones = _get_dynamic_axis_num(x) if nones > ndim: raise ValueError('CNTK Backend: tensor with keras shape: `%s` has ' '%d cntk dynamic axis, this is not expected, please ' 'double check the keras shape history.' % (str(shape), nones)) # Current cntk does not support shape like (1, batch). so using the workaround # here to mapping the correct axis. Will remove this tricky after we add support # in native cntk op cntk_axis = [] dynamic_axis_index = 0 for i in range(ndim): if shape[i] is None and dynamic_axis_index < nones: cntk_axis.append(x.dynamic_axes[dynamic_axis_index]) dynamic_axis_index += 1 else: cntk_axis.append(i - dynamic_axis_index) if dynamic_axis_index < nones: i = 0 while dynamic_axis_index < nones: cntk_axis[i] = x.dynamic_axes[dynamic_axis_index] i += 1 dynamic_axis_index += 1 while i < len(cntk_axis): cntk_axis[i] -= nones i += 1 if isinstance(axis, tuple): _axis = list(axis) elif isinstance(axis, int): _axis = [axis] elif isinstance(axis, list): _axis = list(axis) else: _axis = axis if isinstance(_axis, list): for i, a in enumerate(_axis): if a is not None and a < 0: _axis[i] = (a % ndim) if _axis[i] is not None: _axis[i] = cntk_axis[_axis[i]] else: if _axis is None: _axis = C.Axis.all_axes() return _axis def _reshape_dummy_dim(x, axis): shape = list(x.shape) _axis = [_ + len(shape) if _ < 0 else _ for _ in axis] if shape.count(C.InferredDimension) > 1 or shape.count(C.FreeDimension) > 1: result = x for index in sorted(_axis, reverse=True): result = C.reshape(result, shape=(), begin_axis=index, end_axis=index + 1) return result else: for index in sorted(_axis, reverse=True): del shape[index] shape = [C.InferredDimension if _ == C.FreeDimension else _ for _ in shape] return C.reshape(x, shape) def mean(x, axis=None, keepdims=False): axis = _normalize_axis(axis, x) output = _reduce_on_axis(x, axis, 'reduce_mean') return _remove_dims(output, axis, keepdims) def any(x, axis=None, keepdims=False): reduce_result = sum(x, axis, keepdims=keepdims) any_matrix = C.element_select( reduce_result, ones_like(reduce_result), zeros_like(reduce_result)) if len(reduce_result.shape) == 0 and _get_dynamic_axis_num(x) == 0: return C.reduce_sum(any_matrix) else: return any_matrix def all(x, axis=None, keepdims=False): reduce_result = prod(x, axis, keepdims=keepdims) all_matrix = C.element_select( reduce_result, ones_like(reduce_result), zeros_like(reduce_result)) if len(reduce_result.shape) == 0 and _get_dynamic_axis_num(x) == 0: return C.reduce_sum(all_matrix) else: return all_matrix def classification_error(target, output, axis=-1): return C.ops.reduce_mean( C.equal( argmax( output, axis=-1), argmax( target, axis=-1)), axis=C.Axis.all_axes()) def argmax(x, axis=-1): axis = [axis] axis = _normalize_axis(axis, x) output = C.ops.argmax(x, axis=axis[0]) return _reshape_dummy_dim(output, axis) def argmin(x, axis=-1): axis = [axis] axis = _normalize_axis(axis, x) output = C.ops.argmin(x, axis=axis[0]) return _reshape_dummy_dim(output, axis) def square(x): return C.square(x) def abs(x): return C.abs(x) def sqrt(x): return C.sqrt(x) def exp(x): return C.exp(x) def log(x): return C.log(x) def round(x): return C.round(x) def sigmoid(x): return C.sigmoid(x) def sign(x): return x / C.abs(x) def pow(x, a): return C.pow(x, a) def clip(x, min_value, max_value): if max_value is not None and max_value < min_value: max_value = min_value if max_value is None: max_value = np.inf if min_value is None: min_value = -np.inf return C.clip(x, min_value, max_value) def binary_crossentropy(target, output, from_logits=False): if from_logits: output = C.sigmoid(output) output = C.clip(output, epsilon(), 1.0 - epsilon()) output = -target * C.log(output) - (1.0 - target) * C.log(1.0 - output) return output def get_variable_shape(x): return int_shape(x) def update(x, new_x): return C.assign(x, new_x) def moving_average_update(variable, value, momentum): return C.assign(variable, variable * momentum + value * (1. - momentum)) def update_add(x, increment): result = x + increment return C.assign(x, result) def gradients(loss, variables): # cntk does not support gradients as symbolic op, # to hook up with keras model # we will return a constant as place holder, the cntk learner will apply # the gradient during training. global grad_parameter_dict if isinstance(variables, list) is False: variables = [variables] grads = [] for v in variables: g = C.constant(0, shape=v.shape, name='keras_grad_placeholder') grads.append(g) grad_parameter_dict[g] = v return grads def equal(x, y): return C.equal(x, y) def not_equal(x, y): return C.not_equal(x, y) def greater(x, y): return C.greater(x, y) def greater_equal(x, y): return C.greater_equal(x, y) def less(x, y): return C.less(x, y) def less_equal(x, y): return C.less_equal(x, y) def maximum(x, y): return C.element_max(x, y) def minimum(x, y): return C.element_min(x, y) def sin(x): return C.sin(x) def cos(x): return C.cos(x) def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3): if gamma is None: if beta is None: gamma = ones_like(x) else: gamma = ones_like(beta) if beta is None: if gamma is None: beta = zeros_like(x) else: beta = zeros_like(gamma) mean, variant = _moments(x, _normalize_axis(reduction_axes, x)) if sorted(reduction_axes) == list(range(ndim(x)))[:-1]: normalized = batch_normalization( x, mean, variant, beta, gamma, epsilon) else: # need broadcasting target_shape = [] x_shape = int_shape(x) # skip the batch axis for axis in range(1, ndim(x)): if axis in reduction_axes: target_shape.append(1) if ndim(gamma) > axis: gamma = C.reduce_mean(gamma, axis - 1) beta = C.reduce_mean(beta, axis - 1) else: target_shape.append(x_shape[axis]) broadcast_mean = C.reshape(mean, target_shape) broadcast_var = C.reshape(variant, target_shape) broadcast_gamma = C.reshape(gamma, target_shape) broadcast_beta = C.reshape(beta, target_shape) normalized = batch_normalization( x, broadcast_mean, broadcast_var, broadcast_beta, broadcast_gamma, epsilon) return normalized, mean, variant def _moments(x, axes=None, shift=None, keep_dims=False): _axes = tuple(axes) if shift is None: shift = x # Compute true mean while keeping the dims for proper broadcasting. for axis in _axes: shift = C.reduce_mean(shift, axis=axis) shift = C.stop_gradient(shift) shifted_mean = C.minus(x, shift) for axis in _axes: shifted_mean = C.reduce_mean(shifted_mean, axis=axis) variance_mean = C.square(C.minus(x, shift)) for axis in _axes: variance_mean = C.reduce_mean(variance_mean, axis=axis) variance = C.minus(variance_mean, C.square(shifted_mean)) mean = C.plus(shifted_mean, shift) if not keep_dims: mean = squeeze(mean, _axes) variance = squeeze(variance, _axes) return mean, variance def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3): # The mean / var / beta / gamma may be processed by broadcast # so it may have an extra batch axis with 1, it is not needed # in cntk, need to remove those dummy axis. if ndim(mean) == ndim(x) and shape(mean)[0] == 1: mean = _reshape_dummy_dim(mean, [0]) if ndim(var) == ndim(x) and shape(var)[0] == 1: var = _reshape_dummy_dim(var, [0]) if gamma is None: gamma = ones_like(var) elif ndim(gamma) == ndim(x) and shape(gamma)[0] == 1: gamma = _reshape_dummy_dim(gamma, [0]) if beta is None: beta = zeros_like(mean) elif ndim(beta) == ndim(x) and shape(beta)[0] == 1: beta = _reshape_dummy_dim(beta, [0]) return (x - mean) / C.sqrt(var + epsilon) * gamma + beta def concatenate(tensors, axis=-1): if len(tensors) == 0: return None axis = [axis] axis = _normalize_axis(axis, tensors[0]) return C.splice(*tensors, axis=axis[0]) def flatten(x): return reshape(x, (-1,)) def reshape(x, shape): shape = tuple([C.InferredDimension if _ == C.FreeDimension else _ for _ in shape]) if isinstance(x, C.variables.Parameter): return C.reshape(x, shape) else: num_dynamic_axis = _get_dynamic_axis_num(x) if num_dynamic_axis == 1 and len(shape) > 0 and shape[0] == -1: # collapse axis with batch axis if b_any(_ == C.InferredDimension for _ in x.shape) or b_any( _ == C.FreeDimension for _ in x.shape): warnings.warn( 'Warning: CNTK backend does not support ' 'collapse of batch axis with inferred dimension. ' 'The reshape did not take place.') return x return _reshape_batch(x, shape) else: # no collapse, then first need to padding the shape if num_dynamic_axis >= len(shape): i = 0 while i < len(shape): if shape[i] is None or shape[i] == -1: i += 1 else: break shape = tuple([-1 for _ in range(num_dynamic_axis - i)]) + shape new_shape = list(shape) new_shape = new_shape[num_dynamic_axis:] new_shape = [C.InferredDimension if _ is None else _ for _ in new_shape] return C.reshape(x, new_shape) def permute_dimensions(x, pattern): dims = len(int_shape(x)) num_dynamic_axis = _get_dynamic_axis_num(x) if isinstance(pattern, list): current_layout = [i for i in range(dims)] else: current_layout = tuple([i for i in range(dims)]) if num_dynamic_axis > 0 and pattern[:num_dynamic_axis] != current_layout[:num_dynamic_axis]: raise ValueError('CNTK backend: the permute pattern %s ' 'requested permute on dynamic axis, ' 'which is not supported. Please do permute ' 'on static axis.' % pattern) axis = list(pattern) axis = axis[num_dynamic_axis:] axis = _normalize_axis(axis, x) return C.transpose(x, axis) def resize_images(x, height_factor, width_factor, data_format, interpolation='nearest'): if interpolation == 'nearest': if data_format == 'channels_first': output = repeat_elements(x, height_factor, axis=2) output = repeat_elements(output, width_factor, axis=3) return output elif data_format == 'channels_last': output = repeat_elements(x, height_factor, axis=1) output = repeat_elements(output, width_factor, axis=2) return output else: raise ValueError('CNTK Backend: Invalid data_format: %s' % data_format) else: raise NotImplementedError('CNTK only supports `nearest` interpolation.') def resize_volumes(x, depth_factor, height_factor, width_factor, data_format): if data_format == 'channels_first': output = repeat_elements(x, depth_factor, axis=2) output = repeat_elements(output, height_factor, axis=3) output = repeat_elements(output, width_factor, axis=4) return output elif data_format == 'channels_last': output = repeat_elements(x, depth_factor, axis=1) output = repeat_elements(output, height_factor, axis=2) output = repeat_elements(output, width_factor, axis=3) return output else: raise ValueError('CNTK Backend: Invalid data_format: %s' % data_format) def repeat_elements(x, rep, axis): axis = _normalize_axis(axis, x) axis = axis[0] slices = [] shape = x.shape i = 0 while i < shape[axis]: tmp = C.ops.slice(x, axis, i, i + 1) for _ in range(rep): slices.append(tmp) i += 1 return C.splice(*slices, axis=axis) def repeat(x, n): # this is a workaround for recurrent layer # if n is inferred dimension, # we can't figure out how to repeat it in cntk now # return the same x to take cntk broadcast feature # to make the recurrent layer work. # need to be fixed in GA. if n is C.InferredDimension or n is C.FreeDimension: return x index = 1 - _get_dynamic_axis_num(x) if index < 0 or index > 1: raise NotImplementedError new_shape = list(x.shape) new_shape.insert(index, 1) new_shape = tuple(new_shape) x = C.reshape(x, new_shape) temp = [x] * n return C.splice(*temp, axis=index) def tanh(x): return C.tanh(x) def _static_rnn(step_function, inputs, initial_states, go_backwards=False, mask=None, constants=None, unroll=False, input_length=None): shape = int_shape(inputs) dims = len(shape) uses_learning_phase = False if dims < 3: raise ValueError('Input should be at least 3D.') # if the second axis is static axis, CNTK will do unroll by default if shape[1] is None: raise ValueError('CNTK Backend: the input of static rnn ' 'has shape `%s`, the second axis ' 'is not static. If you want to run ' 'rnn with non-static axis, please try ' 'dynamic rnn with sequence axis.' % shape) if constants is None: constants = [] if mask is not None: mask_shape = int_shape(mask) if len(mask_shape) == dims - 1: mask = expand_dims(mask) nones = _get_dynamic_axis_num(inputs) states = tuple(initial_states) outputs = [] time_axis = 1 - nones if nones > 0 else 1 if go_backwards: i = shape[1] - 1 while i >= 0: current = C.ops.slice(inputs, time_axis, i, i + 1) # remove dummy dimension current = squeeze(current, time_axis) output, new_states = step_function( current, tuple(states) + tuple(constants)) if getattr(output, '_uses_learning_phase', False): uses_learning_phase = True if mask is not None: mask_slice = C.ops.slice(mask, time_axis, i, i + 1) mask_slice = squeeze(mask_slice, time_axis) if len(outputs) == 0: prev_output = zeros_like(output) else: prev_output = outputs[-1] output = C.ops.element_select(mask_slice, output, prev_output) return_states = [] for s, n_s in zip(states, new_states): return_states.append( C.ops.element_select( mask_slice, n_s, s)) new_states = return_states outputs.append(output) states = new_states i -= 1 else: i = 0 while i < shape[1]: current = C.ops.slice(inputs, time_axis, i, i + 1) # remove dummy dimension current = squeeze(current, 1) output, new_states = step_function( current, tuple(states) + tuple(constants)) if getattr(output, '_uses_learning_phase', False): uses_learning_phase = True if mask is not None: mask_slice = C.ops.slice(mask, time_axis, i, i + 1) mask_slice = squeeze(mask_slice, 1) if len(outputs) == 0: prev_output = zeros_like(output) else: prev_output = outputs[-1] output = C.ops.element_select(mask_slice, output, prev_output) return_states = [] for s, n_s in zip(states, new_states): return_states.append( C.ops.element_select( mask_slice, n_s, s)) new_states = return_states outputs.append(output) states = new_states[:len(states)] i += 1 i = 1 # add the time_step axis back final_output = expand_dims(outputs[0], 1) last_output = outputs[0] while i < len(outputs): # add the time_step axis back output_slice = expand_dims(outputs[i], 1) final_output = C.splice(final_output, output_slice, axis=time_axis) last_output = outputs[i] i += 1 last_output._uses_learning_phase = uses_learning_phase return last_output, final_output, states def rnn(step_function, inputs, initial_states, go_backwards=False, mask=None, constants=None, unroll=False, input_length=None): shape = int_shape(inputs) dims = len(shape) global uses_learning_phase uses_learning_phase = False if dims < 3: raise ValueError('CNTK Backend: the input of rnn has only rank %d ' 'Need at least rank 3 to run RNN.' % dims) if _get_dynamic_axis_num(inputs) == 0 or unroll: return _static_rnn( step_function, inputs, initial_states, go_backwards, mask, constants, unroll, input_length) if constants is None: constants = [] num_time_step = shape[1] if num_time_step is None and not has_seq_axis(inputs): num_time_step = inputs.shape[0] initial = [] for s in initial_states: if _get_dynamic_axis_num(s) == 0: if hasattr(C, 'to_batch'): initial.append(C.to_batch(s)) else: initial.append(C.user_function(ConvertToBatch(s))) else: initial.append(s) need_convert = not has_seq_axis(inputs) if go_backwards and need_convert is False: raise NotImplementedError('CNTK Backend: `go_backwards` is not supported with ' 'variable-length sequences. Please specify a ' 'static length for your sequences.') rnn_inputs = inputs if need_convert: if go_backwards: rnn_inputs = reverse(rnn_inputs, 1) rnn_inputs = C.to_sequence(rnn_inputs) rnn_constants = [] for constant in constants: if isinstance(constant, list): new_c = [] for c in constant: if _get_dynamic_axis_num(c) == 1: new_c.append(C.sequence.broadcast_as(c, rnn_inputs)) else: new_c.append(c) rnn_constants.append(new_c) else: if _get_dynamic_axis_num(constant) == 1: rnn_constants.append(C.sequence.broadcast_as(constant, rnn_inputs)) else: rnn_constants.append(constant) else: rnn_constants = constants if mask is not None and not has_seq_axis(mask): if go_backwards: mask = reverse(mask, 1) if len(int_shape(mask)) == 2: mask = expand_dims(mask) mask = C.to_sequence_like(mask, rnn_inputs) states = tuple(initial) with C.default_options(axis_offset=1): def _recurrence(x, states, m): # create place holder place_holders = [C.placeholder(dynamic_axes=x.dynamic_axes) for _ in states] past_values = [] for s, p in zip(states, place_holders): past_values.append(C.sequence.past_value(p, s)) new_output, new_states = step_function( x, tuple(past_values) + tuple(rnn_constants)) if getattr(new_output, '_uses_learning_phase', False): global uses_learning_phase uses_learning_phase = True if m is not None: new_states = [C.element_select(m, n, s) for n, s in zip(new_states, past_values)] n_s = [] for o, p in zip(new_states, place_holders): n_s.append(o.replace_placeholders({p: o.output})) if len(n_s) > 0: new_output = n_s[-1] return new_output, n_s final_output, final_states = _recurrence(rnn_inputs, states, mask) last_output = C.sequence.last(final_output) last_states = [C.sequence.last(s) for s in final_states] if need_convert: final_output = C.sequence.unpack(final_output, 0, no_mask_output=True) if num_time_step is not None and num_time_step is not C.FreeDimension: final_output = _reshape_sequence(final_output, num_time_step) f_stats = [] for l_s, i_s in zip(last_states, initial_states): if _get_dynamic_axis_num(i_s) == 0 and _get_dynamic_axis_num(l_s) == 1: if hasattr(C, 'unpack_batch'): f_stats.append(C.unpack_batch(l_s)) else: f_stats.append(C.user_function(ConvertToStatic(l_s, batch_size=i_s.shape[0]))) else: f_stats.append(l_s) last_output._uses_learning_phase = uses_learning_phase return last_output, final_output, f_stats def has_seq_axis(x): return hasattr(x, 'dynamic_axes') and len(x.dynamic_axes) > 1 def l2_normalize(x, axis=None): axis = [axis] axis = _normalize_axis(axis, x) norm = C.sqrt(C.reduce_sum(C.square(x), axis=axis[0])) return x / norm def hard_sigmoid(x): x = (0.2 * x) + 0.5 x = C.clip(x, 0.0, 1.0) return x def conv1d(x, kernel, strides=1, padding='valid', data_format=None, dilation_rate=1): data_format = normalize_data_format(data_format) if padding == 'causal': # causal (dilated) convolution: left_pad = dilation_rate * (kernel.shape[0] - 1) x = temporal_padding(x, (left_pad, 0)) padding = 'valid' if data_format == 'channels_last': x = C.swapaxes(x, 0, 1) # As of Keras 2.0.0, all kernels are normalized # on the format `(steps, input_depth, depth)`, # independently of `data_format`. # CNTK expects `(depth, input_depth, steps)`. kernel = C.swapaxes(kernel, 0, 2) padding = _preprocess_border_mode(padding) if dev.type() == 0 and dilation_rate != 1: raise ValueError('Dilated convolution on CPU is not supported by CNTK backend. ' 'Please set `dilation_rate` to 1. You passed: %s' % (dilation_rate,)) dilation_rate = (1, dilation_rate) x = C.convolution( kernel, x, strides=strides, auto_padding=[False, padding], dilation=dilation_rate) if data_format == 'channels_last': x = C.swapaxes(x, 0, 1) return x def conv2d(x, kernel, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1)): data_format = normalize_data_format(data_format) x = _preprocess_conv2d_input(x, data_format) kernel = _preprocess_conv2d_kernel(kernel, data_format) padding = _preprocess_border_mode(padding) if dev.type() == 0 and dilation_rate != (1, 1): raise ValueError('Dilated convolution on CPU is not supported by CNTK backend. ' 'Please set `dilation_rate` to (1, 1). ' 'You passed: %s' % (dilation_rate,)) dilation_rate = (1,) + dilation_rate x = C.convolution(kernel, x, strides, auto_padding=[False, padding, padding], dilation=dilation_rate) return _postprocess_conv2d_output(x, data_format) def separable_conv1d(x, depthwise_kernel, pointwise_kernel, strides=1, padding='valid', data_format=None, dilation_rate=1): data_format = normalize_data_format(data_format) if isinstance(strides, int): strides = (strides,) if isinstance(dilation_rate, int): dilation_rate = (dilation_rate,) if data_format == 'channels_last': spatial_start_dim = 2 else: spatial_start_dim = 3 x = expand_dims(x, spatial_start_dim) depthwise_kernel = expand_dims(depthwise_kernel, 1) pointwise_kernel = expand_dims(pointwise_kernel, 1) strides = (1,) + strides + (1,) dilation_rate = (1,) + dilation_rate x = _preprocess_conv2d_input(x, data_format) depthwise_kernel = _preprocess_conv2d_kernel(depthwise_kernel, data_format) depthwise_kernel = C.reshape(C.transpose(depthwise_kernel, (1, 0, 2, 3)), (-1, 1) + depthwise_kernel.shape[2:]) pointwise_kernel = _preprocess_conv2d_kernel(pointwise_kernel, data_format) padding = _preprocess_border_mode(padding) if dilation_rate == (1, 1): x = C.convolution(depthwise_kernel, x, strides=strides, auto_padding=[False, padding, padding], groups=x.shape[0]) x = C.convolution(pointwise_kernel, x, strides=(1, 1, 1), auto_padding=[False]) else: if dilation_rate[0] != dilation_rate[1]: raise ValueError('CNTK Backend: non-square dilation_rate is ' 'not supported.') if strides != (1, 1): raise ValueError('Invalid strides for dilated convolution') x = C.convolution(depthwise_kernel, x, strides=strides, auto_padding=[False, padding, padding], groups=x.shape[0]) x = C.convolution(pointwise_kernel, x, strides=(1, 1, 1), auto_padding=[False]) x = _postprocess_conv2d_output(x, data_format) return squeeze(x, spatial_start_dim) def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1)): data_format = normalize_data_format(data_format) x = _preprocess_conv2d_input(x, data_format) depthwise_kernel = _preprocess_conv2d_kernel(depthwise_kernel, data_format) depthwise_kernel = C.reshape(C.transpose(depthwise_kernel, (1, 0, 2, 3)), (-1, 1) + depthwise_kernel.shape[2:]) pointwise_kernel = _preprocess_conv2d_kernel(pointwise_kernel, data_format) padding = _preprocess_border_mode(padding) if dilation_rate == (1, 1): strides = (1,) + strides x = C.convolution(depthwise_kernel, x, strides=strides, auto_padding=[False, padding, padding], groups=x.shape[0]) x = C.convolution(pointwise_kernel, x, strides=(1, 1, 1), auto_padding=[False]) else: if dilation_rate[0] != dilation_rate[1]: raise ValueError('CNTK Backend: non-square dilation_rate is ' 'not supported.') if strides != (1, 1): raise ValueError('Invalid strides for dilated convolution') x = C.convolution(depthwise_kernel, x, strides=dilation_rate[0], auto_padding=[False, padding, padding]) x = C.convolution(pointwise_kernel, x, strides=(1, 1, 1), auto_padding=[False]) return _postprocess_conv2d_output(x, data_format) def depthwise_conv2d(x, depthwise_kernel, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1)): data_format = normalize_data_format(data_format) x = _preprocess_conv2d_input(x, data_format) depthwise_kernel = _preprocess_conv2d_kernel(depthwise_kernel, data_format) depthwise_kernel = C.reshape(C.transpose(depthwise_kernel, (1, 0, 2, 3)), (-1, 1) + depthwise_kernel.shape[2:]) padding = _preprocess_border_mode(padding) if dilation_rate == (1, 1): strides = (1,) + strides x = C.convolution(depthwise_kernel, x, strides=strides, auto_padding=[False, padding, padding], groups=x.shape[0]) else: if dilation_rate[0] != dilation_rate[1]: raise ValueError('CNTK Backend: non-square dilation_rate is ' 'not supported.') if strides != (1, 1): raise ValueError('Invalid strides for dilated convolution') x = C.convolution(depthwise_kernel, x, strides=dilation_rate[0], auto_padding=[False, padding, padding], groups=x.shape[0]) return _postprocess_conv2d_output(x, data_format) def conv3d(x, kernel, strides=(1, 1, 1), padding='valid', data_format=None, dilation_rate=(1, 1, 1)): data_format = normalize_data_format(data_format) x = _preprocess_conv3d_input(x, data_format) kernel = _preprocess_conv3d_kernel(kernel, data_format) padding = _preprocess_border_mode(padding) if dev.type() == 0 and dilation_rate != (1, 1, 1): raise ValueError('Dilated convolution on CPU is not supported by CNTK backend. ' 'Please set `dilation_rate` to (1, 1, 1). ' 'You passed: %s' % (dilation_rate,)) dilation_rate = (1,) + dilation_rate x = C.convolution( kernel, x, strides, auto_padding=[False, padding, padding, padding], dilation=dilation_rate) return _postprocess_conv3d_output(x, data_format) def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1), padding='valid', data_format=None): data_format = normalize_data_format(data_format) x = _preprocess_conv3d_input(x, data_format) kernel = _preprocess_conv3d_kernel(kernel, data_format) padding = _preprocess_border_mode(padding) strides = (1,) + strides # cntk output_shape does not include batch axis output_shape = output_shape[1:] # in keras2, need handle output shape in different format if data_format == 'channels_last': output_shape = transpose_shape(output_shape, 'channels_first', spatial_axes=(0, 1, 2)) x = C.convolution_transpose( kernel, x, strides, auto_padding=[ False, padding, padding, padding], output_shape=output_shape) return _postprocess_conv3d_output(x, data_format) def pool2d(x, pool_size, strides=(1, 1), padding='valid', data_format=None, pool_mode='max'): data_format = normalize_data_format(data_format) padding = _preprocess_border_mode(padding) strides = strides pool_size = pool_size x = _preprocess_conv2d_input(x, data_format) if pool_mode == 'max': x = C.pooling( x, C.MAX_POOLING, pool_size, strides, auto_padding=[padding]) elif pool_mode == 'avg': x = C.pooling( x, C.AVG_POOLING, pool_size, strides, auto_padding=[padding]) else: raise ValueError('Invalid pooling mode: ' + str(pool_mode)) return _postprocess_conv2d_output(x, data_format) def pool3d(x, pool_size, strides=(1, 1, 1), padding='valid', data_format=None, pool_mode='max'): data_format = normalize_data_format(data_format) padding = _preprocess_border_mode(padding) x = _preprocess_conv3d_input(x, data_format) if pool_mode == 'max': x = C.pooling( x, C.MAX_POOLING, pool_size, strides, auto_padding=[padding]) elif pool_mode == 'avg': x = C.pooling( x, C.AVG_POOLING, pool_size, strides, auto_padding=[padding]) else: raise ValueError('Invalid pooling mode: ' + str(pool_mode)) return _postprocess_conv3d_output(x, data_format) def relu(x, alpha=0., max_value=None, threshold=0.): if alpha != 0.: if threshold != 0.: negative_part = C.relu(-x + threshold) else: negative_part = C.relu(-x) if threshold != 0.: x = x * C.greater(x, threshold) else: x = C.relu(x) if max_value is not None: x = C.clip(x, 0.0, max_value) if alpha != 0.: x -= alpha * negative_part return x def dropout(x, level, noise_shape=None, seed=None): if level < 0. or level >= 1: raise ValueError('CNTK Backend: Invalid dropout level %s, ' 'must be in interval [0, 1].' % level) return C.dropout(x, level) def batch_flatten(x): # cntk's batch axis is not in shape, # so just flatten all the dim in x.shape dim = np.prod(x.shape) x = C.reshape(x, (-1,)) x._keras_shape = (None, dim) return x def softmax(x, axis=-1): return C.softmax(x, axis=axis) def softplus(x): return C.softplus(x) def softsign(x): return x / (1 + C.abs(x)) def categorical_crossentropy(target, output, from_logits=False, axis=-1): # Here, unlike other backends, the tensors lack a batch dimension: axis_without_batch = -1 if axis == -1 else axis - 1 output_dimensions = list(range(len(output.shape))) if axis_without_batch != -1 and axis_without_batch not in output_dimensions: raise ValueError( '{}{}{}'.format( 'Unexpected channels axis {}. '.format(axis_without_batch), 'Expected to be -1 or one of the axes of `output`, ', 'which has {} dimensions.'.format(len(output.shape)))) # If the channels are not in the last axis, move them to be there: if axis_without_batch != -1 and axis_without_batch != output_dimensions[-1]: permutation = output_dimensions[:axis_without_batch] permutation += output_dimensions[axis_without_batch + 1:] permutation += [axis_without_batch] output = C.transpose(output, permutation) target = C.transpose(target, permutation) if from_logits: result = C.cross_entropy_with_softmax(output, target) # cntk's result shape is (batch, 1), while keras expect (batch, ) return C.reshape(result, ()) else: # scale preds so that the class probas of each sample sum to 1 output /= C.reduce_sum(output, axis=-1) # avoid numerical instability with epsilon clipping output = C.clip(output, epsilon(), 1.0 - epsilon()) return -sum(target * C.log(output), axis=-1) def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1): # Here, unlike other backends, the tensors lack a batch dimension: axis_without_batch = -1 if axis == -1 else axis - 1 output_dimensions = list(range(len(output.shape))) if axis_without_batch != -1 and axis_without_batch not in output_dimensions: raise ValueError( '{}{}{}'.format( 'Unexpected channels axis {}. '.format(axis_without_batch), 'Expected to be -1 or one of the axes of `output`, ', 'which has {} dimensions.'.format(len(output.shape)))) target = C.one_hot(target, output.shape[axis_without_batch], axis=axis_without_batch) target = C.reshape(target, output.shape) return categorical_crossentropy(target, output, from_logits, axis=axis) class Function(object): def __init__(self, inputs, outputs, updates=[], **kwargs): self.placeholders = inputs self.trainer = None self.unrelated_updates = None self.updates = updates if len(updates) > 0: assert len(outputs) > 0 self.loss = outputs[0] # need group update by gradient place holder u_ops = [] unrelated_updates = [] for update in updates: if isinstance(update, tuple): if len(update) != 2: raise NotImplementedError else: u = C.assign(update[0], update[1]) else: u = update if len(u.arguments) == 0: u_ops.append(u) else: unrelated_updates.append(u) update_func = C.combine([u.output for u in u_ops]) grads = update_func.find_all_with_name('keras_grad_placeholder') u_list = [] p_list = [] for g in grads: if g in grad_parameter_dict: p_list.append(grad_parameter_dict[g]) u_list.append(g) else: raise ValueError( 'CNTK backend: when constructing trainer, ' 'found gradient node `%s` which is not ' 'related to any parameters in the model. ' 'Please double check how the gradient node ' 'is constructed.' % g) if len(u_list) > 0: learner = C.cntk_py.universal_learner(p_list, u_list, update_func) criterion = ( outputs[0], outputs[1]) if len(outputs) > 1 else ( outputs[0], ) self.trainer = C.trainer.Trainer( outputs[0], criterion, [learner]) self.trainer_output = tuple([f.output for f in criterion]) elif len(u_ops) > 0: unrelated_updates.extend(u_ops) if len(unrelated_updates) > 0: self.unrelated_updates = C.combine([_.output for _ in unrelated_updates]) if self.trainer is None: self.metrics_outputs = [f.output for f in outputs] self.metrics_func = C.combine(self.metrics_outputs) # cntk only could handle loss and 1 metric in trainer, for metrics more # than 2, need manual eval elif len(outputs) > 2: self.metrics_outputs = [f.output for f in outputs[2:]] self.metrics_func = C.combine(self.metrics_outputs) else: self.metrics_func = None @staticmethod def _is_input_shape_compatible(input, placeholder): if hasattr(input, 'shape') and hasattr(placeholder, 'shape'): num_dynamic = get_num_dynamic_axis(placeholder) input_shape = input.shape[num_dynamic:] placeholder_shape = placeholder.shape for i, p in zip(input_shape, placeholder_shape): if i != p and p != C.InferredDimension and p != C.FreeDimension: return False return True def __call__(self, inputs): global _LEARNING_PHASE_PLACEHOLDER global _LEARNING_PHASE assert isinstance(inputs, (list, tuple)) feed_dict = {} for tensor, value in zip(self.placeholders, inputs): # cntk only support calculate on float, do auto cast here if (hasattr(value, 'dtype') and value.dtype != np.float32 and value.dtype != np.float64): value = value.astype(np.float32) if tensor == _LEARNING_PHASE_PLACEHOLDER: _LEARNING_PHASE_PLACEHOLDER.value = np.asarray(value) else: # in current version cntk can't support input with variable # length. Will support it in next release. if not self._is_input_shape_compatible(value, tensor): raise ValueError('CNTK backend: The placeholder has been resolved ' 'to shape `%s`, but input shape is `%s`. Currently ' 'CNTK can not take variable length inputs. Please ' 'pass inputs that have a static shape.' % (str(tensor.shape), str(value.shape))) feed_dict[tensor] = value updated = [] if self.trainer is not None: input_dict = {} for argument in self.loss.arguments: if argument in feed_dict: input_dict[argument] = feed_dict[argument] else: raise ValueError( 'CNTK backend: argument %s is not found in inputs. ' 'Please double check the model and inputs in ' '`train_function`.' % argument.name) result = self.trainer.train_minibatch( input_dict, self.trainer_output) assert(len(result) == 2) outputs = result[1] for o in self.trainer_output: updated.append(outputs[o]) if self.metrics_func is not None: input_dict = {} for argument in self.metrics_func.arguments: if argument in feed_dict: input_dict[argument] = feed_dict[argument] else: raise ValueError('CNTK backend: metrics argument %s ' 'is not found in inputs. Please double ' 'check the model and inputs.' % argument.name) # Some ops (like dropout) won't be applied during "eval" in cntk. # They only evaluated in training phase. To make it work, call # "forward" method to let cntk know we want to evaluate them.from # But the assign ops won't be executed under this mode, that's why # we need this check. if (self.unrelated_updates is None and (_LEARNING_PHASE_PLACEHOLDER.value == 1.0 or _LEARNING_PHASE == 1)): _, output_values = self.metrics_func.forward( input_dict, self.metrics_func.outputs, (self.metrics_func.outputs[0],), as_numpy=False) else: output_values = self.metrics_func.eval(input_dict, as_numpy=False) if isinstance(output_values, dict): for o in self.metrics_outputs: value = output_values[o] v = value.asarray() updated.append(v) else: v = output_values.asarray() for o in self.metrics_outputs: updated.append(v) if self.unrelated_updates is not None: input_dict = {} for argument in self.unrelated_updates.arguments: if argument in feed_dict: input_dict[argument] = feed_dict[argument] else: raise ValueError( 'CNTK backend: assign ops argument %s ' 'is not found in inputs. Please double ' 'check the model and inputs.' % argument.name) self.unrelated_updates.eval(input_dict, as_numpy=False) return updated def function(inputs, outputs, updates=[], **kwargs): return Function(inputs, outputs, updates=updates, **kwargs) def temporal_padding(x, padding=(1, 1)): assert len(padding) == 2 num_dynamic_axis = _get_dynamic_axis_num(x) assert len(x.shape) == 3 - (1 if num_dynamic_axis > 0 else 0) return pad(x, [padding], 'channels_last', num_dynamic_axis) def _padding(x, pattern, axis): # pragma: no cover base_shape = x.shape if b_any([dim < 0 for dim in base_shape]): raise ValueError('CNTK Backend: padding input tensor with ' 'shape `%s` contains non-specified dimension, ' 'which is not supported. Please give fixed ' 'dimension to enable padding.' % base_shape) if pattern[0] > 0: prefix_shape = list(base_shape) prefix_shape[axis] = pattern[0] prefix_shape = tuple(prefix_shape) x = C.splice(C.constant(value=0, shape=prefix_shape), x, axis=axis) base_shape = x.shape if pattern[1] > 0: postfix_shape = list(base_shape) postfix_shape[axis] = pattern[1] postfix_shape = tuple(postfix_shape) x = C.splice(x, C.constant(value=0, shape=postfix_shape), axis=axis) return x def pad(x, pad_info, data_format, num_dynamic_axis): if hasattr(C, 'pad'): pattern = [list(p) for p in pad_info] if data_format == 'channels_first': pattern = [[0, 0]] + pattern else: pattern = pattern + [[0, 0]] if num_dynamic_axis == 0: pattern = [[0, 0]] + pattern return C.pad(x, pattern=pattern) else: # pragma: no cover for (a, p) in enumerate(pad_info): x = _padding(x, p, a + (1 if num_dynamic_axis == 0 else 0) + (1 if data_format == 'channels_first' else 0)) return x def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None): assert len(padding) == 2 assert len(padding[0]) == 2 assert len(padding[1]) == 2 data_format = normalize_data_format(data_format) num_dynamic_axis = _get_dynamic_axis_num(x) assert len(x.shape) == 4 - (1 if num_dynamic_axis > 0 else 0) return pad(x, padding, data_format, num_dynamic_axis) def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None): assert len(padding) == 3 assert len(padding[0]) == 2 assert len(padding[1]) == 2 assert len(padding[2]) == 2 data_format = normalize_data_format(data_format) num_dynamic_axis = _get_dynamic_axis_num(x) assert len(x.shape) == 5 - (1 if num_dynamic_axis > 0 else 0) return pad(x, padding, data_format, num_dynamic_axis) def one_hot(indices, num_classes): return C.one_hot(indices, num_classes) def get_value(x): if isinstance( x, C.variables.Parameter) or isinstance( x, C.variables.Constant): return x.value else: return eval(x) def batch_get_value(xs): result = [] for x in xs: if (isinstance(x, C.variables.Parameter) or isinstance(x, C.variables.Constant)): result.append(x.value) else: result.append(eval(x)) return result def set_value(x, value): if (isinstance(x, C.variables.Parameter) or isinstance(x, C.variables.Constant)): if isinstance(value, (float, int)): value = np.full(x.shape, value, dtype=floatx()) x.value = value else: raise NotImplementedError def print_tensor(x, message=''): return C.user_function( LambdaFunc(x, when=lambda x: True, execute=lambda x: print(message))) def batch_set_value(tuples): for t in tuples: x = t[0] value = t[1] if isinstance(value, np.ndarray) is False: value = np.asarray(value) if isinstance(x, C.variables.Parameter): x.value = value else: raise NotImplementedError def stop_gradient(variables): if isinstance(variables, (list, tuple)): return map(C.stop_gradient, variables) else: return C.stop_gradient(variables) def switch(condition, then_expression, else_expression): ndim_cond = ndim(condition) ndim_expr = ndim(then_expression) if ndim_cond > ndim_expr: raise ValueError('Rank of condition should be less' ' than or equal to rank of then and' ' else expressions. ndim(condition)=' + str(ndim_cond) + ', ndim(then_expression)' '=' + str(ndim_expr)) elif ndim_cond < ndim_expr: shape_expr = int_shape(then_expression) ndim_diff = ndim_expr - ndim_cond for i in range(ndim_diff): condition = expand_dims(condition) condition = tile(condition, shape_expr[ndim_cond + i]) return C.element_select(condition, then_expression, else_expression) def elu(x, alpha=1.): res = C.elu(x) if alpha == 1: return res else: return C.element_select(C.greater(x, 0), res, alpha * res) def in_top_k(predictions, targets, k): _targets = C.one_hot(targets, predictions.shape[-1]) result = C.classification_error(predictions, _targets, topN=k) return 1 - C.reshape(result, shape=()) def conv2d_transpose(x, kernel, output_shape, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1)): data_format = normalize_data_format(data_format) x = _preprocess_conv2d_input(x, data_format) kernel = _preprocess_conv2d_kernel(kernel, data_format) padding = _preprocess_border_mode(padding) strides = (1,) + strides # cntk output_shape does not include batch axis output_shape = output_shape[1:] # in keras2, need handle output shape in different format if data_format == 'channels_last': output_shape = transpose_shape(output_shape, 'channels_first', spatial_axes=(0, 1)) dilation_rate = (1,) + dilation_rate x = C.convolution_transpose( kernel, x, strides, auto_padding=[ False, padding, padding], output_shape=output_shape, dilation=dilation_rate) return _postprocess_conv2d_output(x, data_format) def identity(x, name=None): if name is None: name = '%s_alias' % x.name return C.alias(x, name=name) def _preprocess_conv2d_input(x, data_format): if data_format == 'channels_last': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, rows, cols) # TF input shape: (samples, rows, cols, input_depth) x = C.transpose(x, (2, 0, 1)) return x def _preprocess_conv2d_kernel(kernel, data_format): # As of Keras 2.0.0, all kernels are normalized # on the format `(rows, cols, input_depth, depth)`, # independently of `data_format`. # CNTK expects `(depth, input_depth, rows, cols)`. kernel = C.transpose(kernel, (3, 2, 0, 1)) return kernel def _preprocess_border_mode(padding): if padding == 'same': padding = True elif padding == 'valid': padding = False else: raise ValueError('Invalid border mode: ' + str(padding)) return padding def _postprocess_conv2d_output(x, data_format): if data_format == 'channels_last': x = C.transpose(x, (1, 2, 0)) return x def _preprocess_conv3d_input(x, data_format): if data_format == 'channels_last': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3) # TF input shape: (samples, conv_dim1, conv_dim2, conv_dim3, # input_depth) x = C.transpose(x, (3, 0, 1, 2)) return x def _preprocess_conv3d_kernel(kernel, dim_ordering): kernel = C.transpose(kernel, (4, 3, 0, 1, 2)) return kernel def _postprocess_conv3d_output(x, dim_ordering): if dim_ordering == 'channels_last': x = C.transpose(x, (1, 2, 3, 0)) return x def _get_dynamic_axis_num(x): if hasattr(x, 'dynamic_axes'): return len(x.dynamic_axes) else: return 0 def _contain_seqence_axis(x): if _get_dynamic_axis_num(x) > 1: return x.dynamic_axes[1] == C.Axis.default_dynamic_axis() else: return False def get_num_dynamic_axis(x): return _get_dynamic_axis_num(x) def _reduce_on_axis(x, axis, reduce_fun_name): if isinstance(axis, list): for a in axis: if isinstance(a, C.Axis) \ and a != C.Axis.default_batch_axis() \ and hasattr(C.sequence, reduce_fun_name): x = getattr(C.sequence, reduce_fun_name)(x, a) else: x = getattr(C, reduce_fun_name)(x, a) else: x = getattr(C, reduce_fun_name)(x, axis) return x def _reshape_sequence(x, time_step): tmp_shape = list(int_shape(x)) tmp_shape[1] = time_step return reshape(x, tmp_shape) def local_conv1d(inputs, kernel, kernel_size, strides, data_format=None): data_format = normalize_data_format(data_format) stride = strides[0] kernel_shape = int_shape(kernel) output_length, feature_dim, filters = kernel_shape xs = [] for i in range(output_length): slice_length = py_slice(i * stride, i * stride + kernel_size[0]) xs.append(reshape(inputs[:, slice_length, :], (-1, 1, feature_dim))) x_aggregate = concatenate(xs, axis=1) # transpose kernel to output_filters first, to apply broadcast weight = permute_dimensions(kernel, (2, 0, 1)) # Shape: (batch, filters, output_length, input_length * kernel_size) output = x_aggregate * weight # Shape: (batch, filters, output_length) output = sum(output, axis=3) # Shape: (batch, output_length, filters) return permute_dimensions(output, (0, 2, 1)) def local_conv2d(inputs, kernel, kernel_size, strides, output_shape, data_format=None): data_format = normalize_data_format(data_format) stride_row, stride_col = strides output_row, output_col = output_shape kernel_shape = int_shape(kernel) _, feature_dim, filters = kernel_shape xs = [] for i in range(output_row): for j in range(output_col): slice_row = py_slice(i * stride_row, i * stride_row + kernel_size[0]) slice_col = py_slice(j * stride_col, j * stride_col + kernel_size[1]) if data_format == 'channels_first': xs.append(reshape(inputs[:, :, slice_row, slice_col], (-1, 1, feature_dim))) else: xs.append(reshape(inputs[:, slice_row, slice_col, :], (-1, 1, feature_dim))) x_aggregate = concatenate(xs, axis=1) # transpose kernel to put filters first weight = permute_dimensions(kernel, (2, 0, 1)) # shape: batch, filters, output_length, input_length * kernel_size output = x_aggregate * weight # shape: batch, filters, output_length output = sum(output, axis=3) # shape: batch, filters, row, col output = reshape(output, (-1, filters, output_row, output_col)) if data_format == 'channels_last': # shape: batch, row, col, filters output = permute_dimensions(output, (0, 2, 3, 1)) return output def reverse(x, axes): if isinstance(axes, int): axes = [axes] cntk_axes = _normalize_axis(axes, x) begin_index = [0 for _ in cntk_axes] end_index = [0 for _ in cntk_axes] strides = [-1 for _ in cntk_axes] return C.slice(x, cntk_axes, begin_index, end_index, strides) def slice(x, start, size): raise NotImplementedError def _reshape_batch(x, shape): # there is a bug in cntk 2.1's unpack_batch implementation if hasattr(C, 'unpack_batch') and _get_cntk_version() >= 2.2: const_a = C.unpack_batch(x) const_a = C.reshape(const_a, shape) return C.to_batch(const_a) else: return C.user_function(ReshapeBatch(x, shape[1:])) def _get_cntk_version(): version = C.__version__ if version.endswith('+'): version = version[:-1] # for hot fix, ignore all the . except the first one. if len(version) > 2 and version[1] == '.': version = version[:2] + version[2:].replace('.', '') try: return float(version) except: warnings.warn( 'CNTK backend warning: CNTK version not detected. ' 'Will using CNTK 2.0 GA as default.') return float(2.0) class ReshapeBatch(C.ops.functions.UserFunction): def __init__(self, input, shape, name='reshape_with_batch'): super(ReshapeBatch, self).__init__([input], as_numpy=False, name=name) self.from_shape = input.shape self.target_shape = shape def infer_outputs(self): batch_axis = C.Axis.default_batch_axis() return [ C.output_variable( self.target_shape, self.inputs[0].dtype, [batch_axis])] def forward(self, arguments, device=None, outputs_to_retain=None): num_element = arguments.shape()[0] * np.prod(np.asarray(self.from_shape)) num_static_element = np.prod(np.asarray(self.target_shape)) num_batch = int(num_element / num_static_element) result = arguments.data().as_shape((num_batch,) + self.target_shape) return None, C.cntk_py.Value(result) def backward(self, state, root_gradients): grad_array_view = root_gradients.data() num_element = root_gradients.shape()[0] * np.prod(np.asarray(self.target_shape)) num_static_element = np.prod(np.asarray(self.from_shape)) num_old_batch = int(num_element / num_static_element) return C.cntk_py.Value( grad_array_view.as_shape( (num_old_batch,) + self.from_shape)) class ConvertToBatch(C.ops.functions.UserFunction): """Converts input first axis to CNTK batch axis. We may introduce this operation in CNTK native implementation later. # Arguments inputs: a cntk variable (parameter/constant) name: name of this node """ def __init__(self, input, name='convert_to_batch'): super(ConvertToBatch, self).__init__([input], as_numpy=False, name=name) def infer_outputs(self): batch_axis = C.Axis.default_batch_axis() return [ C.output_variable( self.inputs[0].shape[1:], self.inputs[0].dtype, [batch_axis])] def forward(self, arguments, device=None, outputs_to_retain=None): return None, C.cntk_py.Value(arguments.data()) def backward(self, state, root_gradients): return C.cntk_py.Value(root_gradients.data()) class ConvertToStatic(C.ops.functions.UserFunction): """Converts input first axis to CNTK static axis. We may introduce this operation in CNTK native implementation later. # Arguments inputs: a cntk tensor which has batch axis batch_size: size of batch axis. name: name of this node. """ def __init__(self, input, batch_size, name='convert_to_static'): super(ConvertToStatic, self).__init__([input], as_numpy=False, name=name) self.target_shape = (batch_size,) + input.shape def infer_outputs(self): return [ C.output_variable( self.target_shape, self.inputs[0].dtype, [])] def forward(self, arguments, device=None, outputs_to_retain=None): return None, C.cntk_py.Value(arguments.data()) def backward(self, state, root_gradients): return C.cntk_py.Value(root_gradients.data()) class LambdaFunc(C.ops.functions.UserFunction): def __init__(self, arg, when=lambda arg: True, execute=lambda arg: print(arg), name=''): self.when = when self.execute = execute super(LambdaFunc, self).__init__([arg], name=name) def infer_outputs(self): return [ C.output_variable( self.inputs[0].shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes)] def forward(self, argument, device=None, outputs_to_retain=None): if self.when(argument): self.execute(argument) return None, argument def backward(self, state, root_gradients): return root_gradients Keras-2.2.4/keras/regularizers.py0000644000000000116100000000413613240665765016540 0ustar rooteng00000000000000"""Built-in regularizers. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import six from . import backend as K from .utils.generic_utils import serialize_keras_object from .utils.generic_utils import deserialize_keras_object class Regularizer(object): """Regularizer base class. """ def __call__(self, x): return 0. @classmethod def from_config(cls, config): return cls(**config) class L1L2(Regularizer): """Regularizer for L1 and L2 regularization. # Arguments l1: Float; L1 regularization factor. l2: Float; L2 regularization factor. """ def __init__(self, l1=0., l2=0.): self.l1 = K.cast_to_floatx(l1) self.l2 = K.cast_to_floatx(l2) def __call__(self, x): regularization = 0. if self.l1: regularization += K.sum(self.l1 * K.abs(x)) if self.l2: regularization += K.sum(self.l2 * K.square(x)) return regularization def get_config(self): return {'l1': float(self.l1), 'l2': float(self.l2)} # Aliases. def l1(l=0.01): return L1L2(l1=l) def l2(l=0.01): return L1L2(l2=l) def l1_l2(l1=0.01, l2=0.01): return L1L2(l1=l1, l2=l2) def serialize(regularizer): return serialize_keras_object(regularizer) def deserialize(config, custom_objects=None): return deserialize_keras_object(config, module_objects=globals(), custom_objects=custom_objects, printable_module_name='regularizer') def get(identifier): if identifier is None: return None if isinstance(identifier, dict): return deserialize(identifier) elif isinstance(identifier, six.string_types): config = {'class_name': str(identifier), 'config': {}} return deserialize(config) elif callable(identifier): return identifier else: raise ValueError('Could not interpret regularizer identifier: ' + str(identifier)) Keras-2.2.4/keras/optimizers.py0000644000000000116100000007350013354530144016214 0ustar rooteng00000000000000"""Built-in optimizer classes. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import six import copy from six.moves import zip from . import backend as K from .utils.generic_utils import serialize_keras_object from .utils.generic_utils import deserialize_keras_object from .legacy import interfaces if K.backend() == 'tensorflow': import tensorflow as tf def clip_norm(g, c, n): """Clip the gradient `g` if the L2 norm `n` exceeds `c`. # Arguments g: Tensor, the gradient tensor c: float >= 0. Gradients will be clipped when their L2 norm exceeds this value. n: Tensor, actual norm of `g`. # Returns Tensor, the gradient clipped if required. """ if c <= 0: # if clipnorm == 0 no need to add ops to the graph return g # tf require using a special op to multiply IndexedSliced by scalar if K.backend() == 'tensorflow': condition = n >= c then_expression = tf.scalar_mul(c / n, g) else_expression = g # saving the shape to avoid converting sparse tensor to dense if isinstance(then_expression, tf.Tensor): g_shape = copy.copy(then_expression.get_shape()) elif isinstance(then_expression, tf.IndexedSlices): g_shape = copy.copy(then_expression.dense_shape) if condition.dtype != tf.bool: condition = tf.cast(condition, 'bool') g = tf.cond(condition, lambda: then_expression, lambda: else_expression) if isinstance(then_expression, tf.Tensor): g.set_shape(g_shape) elif isinstance(then_expression, tf.IndexedSlices): g._dense_shape = g_shape else: g = K.switch(K.greater_equal(n, c), g * c / n, g) return g class Optimizer(object): """Abstract optimizer base class. Note: this is the parent class of all optimizers, not an actual optimizer that can be used for training models. All Keras optimizers support the following keyword arguments: clipnorm: float >= 0. Gradients will be clipped when their L2 norm exceeds this value. clipvalue: float >= 0. Gradients will be clipped when their absolute value exceeds this value. """ def __init__(self, **kwargs): allowed_kwargs = {'clipnorm', 'clipvalue'} for k in kwargs: if k not in allowed_kwargs: raise TypeError('Unexpected keyword argument ' 'passed to optimizer: ' + str(k)) self.__dict__.update(kwargs) self.updates = [] self.weights = [] @interfaces.legacy_get_updates_support def get_updates(self, loss, params): raise NotImplementedError def get_gradients(self, loss, params): grads = K.gradients(loss, params) if None in grads: raise ValueError('An operation has `None` for gradient. ' 'Please make sure that all of your ops have a ' 'gradient defined (i.e. are differentiable). ' 'Common ops without gradient: ' 'K.argmax, K.round, K.eval.') if hasattr(self, 'clipnorm') and self.clipnorm > 0: norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) grads = [clip_norm(g, self.clipnorm, norm) for g in grads] if hasattr(self, 'clipvalue') and self.clipvalue > 0: grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads] return grads def set_weights(self, weights): """Sets the weights of the optimizer, from Numpy arrays. Should only be called after computing the gradients (otherwise the optimizer has no weights). # Arguments weights: a list of Numpy arrays. The number of arrays and their shape must match number of the dimensions of the weights of the optimizer (i.e. it should match the output of `get_weights`). # Raises ValueError: in case of incompatible weight shapes. """ params = self.weights if len(params) != len(weights): raise ValueError('Length of the specified weight list (' + str(len(weights)) + ') does not match the number of weights ' + 'of the optimizer (' + str(len(params)) + ')') weight_value_tuples = [] param_values = K.batch_get_value(params) for pv, p, w in zip(param_values, params, weights): if pv.shape != w.shape: raise ValueError('Optimizer weight shape ' + str(pv.shape) + ' not compatible with ' 'provided weight shape ' + str(w.shape)) weight_value_tuples.append((p, w)) K.batch_set_value(weight_value_tuples) def get_weights(self): """Returns the current value of the weights of the optimizer. # Returns A list of numpy arrays. """ return K.batch_get_value(self.weights) def get_config(self): config = {} if hasattr(self, 'clipnorm'): config['clipnorm'] = self.clipnorm if hasattr(self, 'clipvalue'): config['clipvalue'] = self.clipvalue return config @classmethod def from_config(cls, config): return cls(**config) class SGD(Optimizer): """Stochastic gradient descent optimizer. Includes support for momentum, learning rate decay, and Nesterov momentum. # Arguments lr: float >= 0. Learning rate. momentum: float >= 0. Parameter that accelerates SGD in the relevant direction and dampens oscillations. decay: float >= 0. Learning rate decay over each update. nesterov: boolean. Whether to apply Nesterov momentum. """ def __init__(self, lr=0.01, momentum=0., decay=0., nesterov=False, **kwargs): super(SGD, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.momentum = K.variable(momentum, name='momentum') self.decay = K.variable(decay, name='decay') self.initial_decay = decay self.nesterov = nesterov @interfaces.legacy_get_updates_support def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) # momentum shapes = [K.int_shape(p) for p in params] moments = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments for p, g, m in zip(params, grads, moments): v = self.momentum * m - lr * g # velocity self.updates.append(K.update(m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates def get_config(self): config = {'lr': float(K.get_value(self.lr)), 'momentum': float(K.get_value(self.momentum)), 'decay': float(K.get_value(self.decay)), 'nesterov': self.nesterov} base_config = super(SGD, self).get_config() return dict(list(base_config.items()) + list(config.items())) class RMSprop(Optimizer): """RMSProp optimizer. It is recommended to leave the parameters of this optimizer at their default values (except the learning rate, which can be freely tuned). This optimizer is usually a good choice for recurrent neural networks. # Arguments lr: float >= 0. Learning rate. rho: float >= 0. epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. decay: float >= 0. Learning rate decay over each update. # References - [rmsprop: Divide the gradient by a running average of its recent magnitude] (http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) """ def __init__(self, lr=0.001, rho=0.9, epsilon=None, decay=0., **kwargs): super(RMSprop, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.lr = K.variable(lr, name='lr') self.rho = K.variable(rho, name='rho') self.decay = K.variable(decay, name='decay') self.iterations = K.variable(0, dtype='int64', name='iterations') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay @interfaces.legacy_get_updates_support def get_updates(self, loss, params): grads = self.get_gradients(loss, params) accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = accumulators self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * K.square(g) self.updates.append(K.update(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates def get_config(self): config = {'lr': float(K.get_value(self.lr)), 'rho': float(K.get_value(self.rho)), 'decay': float(K.get_value(self.decay)), 'epsilon': self.epsilon} base_config = super(RMSprop, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Adagrad(Optimizer): """Adagrad optimizer. Adagrad is an optimizer with parameter-specific learning rates, which are adapted relative to how frequently a parameter gets updated during training. The more updates a parameter receives, the smaller the updates. It is recommended to leave the parameters of this optimizer at their default values. # Arguments lr: float >= 0. Initial learning rate. epsilon: float >= 0. If `None`, defaults to `K.epsilon()`. decay: float >= 0. Learning rate decay over each update. # References - [Adaptive Subgradient Methods for Online Learning and Stochastic Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) """ def __init__(self, lr=0.01, epsilon=None, decay=0., **kwargs): super(Adagrad, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.lr = K.variable(lr, name='lr') self.decay = K.variable(decay, name='decay') self.iterations = K.variable(0, dtype='int64', name='iterations') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay @interfaces.legacy_get_updates_support def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): new_a = a + K.square(g) # update accumulator self.updates.append(K.update(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates def get_config(self): config = {'lr': float(K.get_value(self.lr)), 'decay': float(K.get_value(self.decay)), 'epsilon': self.epsilon} base_config = super(Adagrad, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Adadelta(Optimizer): """Adadelta optimizer. Adadelta is a more robust extension of Adagrad that adapts learning rates based on a moving window of gradient updates, instead of accumulating all past gradients. This way, Adadelta continues learning even when many updates have been done. Compared to Adagrad, in the original version of Adadelta you don't have to set an initial learning rate. In this version, initial learning rate and decay factor can be set, as in most other Keras optimizers. It is recommended to leave the parameters of this optimizer at their default values. # Arguments lr: float >= 0. Initial learning rate, defaults to 1. It is recommended to leave it at the default value. rho: float >= 0. Adadelta decay factor, corresponding to fraction of gradient to keep at each time step. epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. decay: float >= 0. Initial learning rate decay. # References - [Adadelta - an adaptive learning rate method] (https://arxiv.org/abs/1212.5701) """ def __init__(self, lr=1.0, rho=0.95, epsilon=None, decay=0., **kwargs): super(Adadelta, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.lr = K.variable(lr, name='lr') self.decay = K.variable(decay, name='decay') self.iterations = K.variable(0, dtype='int64', name='iterations') if epsilon is None: epsilon = K.epsilon() self.rho = rho self.epsilon = epsilon self.initial_decay = decay @interfaces.legacy_get_updates_support def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] delta_accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators + delta_accumulators self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * K.square(g) self.updates.append(K.update(a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon) new_p = p - lr * update # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update) self.updates.append(K.update(d_a, new_d_a)) return self.updates def get_config(self): config = {'lr': float(K.get_value(self.lr)), 'rho': self.rho, 'decay': float(K.get_value(self.decay)), 'epsilon': self.epsilon} base_config = super(Adadelta, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Adam(Optimizer): """Adam optimizer. Default parameters follow those provided in the original paper. # Arguments lr: float >= 0. Learning rate. beta_1: float, 0 < beta < 1. Generally close to 1. beta_2: float, 0 < beta < 1. Generally close to 1. epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. decay: float >= 0. Learning rate decay over each update. amsgrad: boolean. Whether to apply the AMSGrad variant of this algorithm from the paper "On the Convergence of Adam and Beyond". # References - [Adam - A Method for Stochastic Optimization] (https://arxiv.org/abs/1412.6980v8) - [On the Convergence of Adam and Beyond] (https://openreview.net/forum?id=ryQu7f-RZ) """ def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., amsgrad=False, **kwargs): super(Adam, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay self.amsgrad = amsgrad @interfaces.legacy_get_updates_support def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) if self.amsgrad: vhat_t = K.maximum(vhat, v_t) p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates def get_config(self): config = {'lr': float(K.get_value(self.lr)), 'beta_1': float(K.get_value(self.beta_1)), 'beta_2': float(K.get_value(self.beta_2)), 'decay': float(K.get_value(self.decay)), 'epsilon': self.epsilon, 'amsgrad': self.amsgrad} base_config = super(Adam, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Adamax(Optimizer): """Adamax optimizer from Adam paper's Section 7. It is a variant of Adam based on the infinity norm. Default parameters follow those provided in the paper. # Arguments lr: float >= 0. Learning rate. beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1. epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. decay: float >= 0. Learning rate decay over each update. # References - [Adam - A Method for Stochastic Optimization] (https://arxiv.org/abs/1412.6980v8) """ def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., **kwargs): super(Adamax, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay @interfaces.legacy_get_updates_support def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr / (1. - K.pow(self.beta_1, t)) shapes = [K.int_shape(p) for p in params] # zero init of 1st moment ms = [K.zeros(shape) for shape in shapes] # zero init of exponentially weighted infinity norm us = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + us for p, g, m, u in zip(params, grads, ms, us): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g u_t = K.maximum(self.beta_2 * u, K.abs(g)) p_t = p - lr_t * m_t / (u_t + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(u, u_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates def get_config(self): config = {'lr': float(K.get_value(self.lr)), 'beta_1': float(K.get_value(self.beta_1)), 'beta_2': float(K.get_value(self.beta_2)), 'decay': float(K.get_value(self.decay)), 'epsilon': self.epsilon} base_config = super(Adamax, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Nadam(Optimizer): """Nesterov Adam optimizer. Much like Adam is essentially RMSprop with momentum, Nadam is Adam RMSprop with Nesterov momentum. Default parameters follow those provided in the paper. It is recommended to leave the parameters of this optimizer at their default values. # Arguments lr: float >= 0. Learning rate. beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1. epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. # References - [Nadam report](http://cs229.stanford.edu/proj2015/054_report.pdf) - [On the importance of initialization and momentum in deep learning] (http://www.cs.toronto.edu/~fritz/absps/momentum.pdf) """ def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004, **kwargs): super(Nadam, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.m_schedule = K.variable(1., name='m_schedule') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.schedule_decay = schedule_decay @interfaces.legacy_get_updates_support def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = K.cast(self.iterations, K.floatx()) + 1 # Due to the recommendations in [2], i.e. warming momentum schedule momentum_cache_t = self.beta_1 * (1. - 0.5 * ( K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) momentum_cache_t_1 = self.beta_1 * (1. - 0.5 * ( K.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) m_schedule_new = self.m_schedule * momentum_cache_t m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 self.updates.append((self.m_schedule, m_schedule_new)) shapes = [K.int_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): # the following equations given in [1] g_prime = g / (1. - m_schedule_new) m_t = self.beta_1 * m + (1. - self.beta_1) * g m_t_prime = m_t / (1. - m_schedule_next) v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g) v_t_prime = v_t / (1. - K.pow(self.beta_2, t)) m_t_bar = (1. - momentum_cache_t) * g_prime + ( momentum_cache_t_1 * m_t_prime) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates def get_config(self): config = {'lr': float(K.get_value(self.lr)), 'beta_1': float(K.get_value(self.beta_1)), 'beta_2': float(K.get_value(self.beta_2)), 'epsilon': self.epsilon, 'schedule_decay': self.schedule_decay} base_config = super(Nadam, self).get_config() return dict(list(base_config.items()) + list(config.items())) class TFOptimizer(Optimizer): """Wrapper class for native TensorFlow optimizers. """ def __init__(self, optimizer): self.optimizer = optimizer with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') @interfaces.legacy_get_updates_support def get_updates(self, loss, params): grads = self.optimizer.compute_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] opt_update = self.optimizer.apply_gradients( grads, global_step=self.iterations) self.updates.append(opt_update) return self.updates @property def weights(self): raise NotImplementedError def get_config(self): raise NotImplementedError def from_config(self, config): raise NotImplementedError # Aliases. sgd = SGD rmsprop = RMSprop adagrad = Adagrad adadelta = Adadelta adam = Adam adamax = Adamax nadam = Nadam def serialize(optimizer): return serialize_keras_object(optimizer) def deserialize(config, custom_objects=None): """Inverse of the `serialize` function. # Arguments config: Optimizer configuration dictionary. custom_objects: Optional dictionary mapping names (strings) to custom objects (classes and functions) to be considered during deserialization. # Returns A Keras Optimizer instance. """ all_classes = { 'sgd': SGD, 'rmsprop': RMSprop, 'adagrad': Adagrad, 'adadelta': Adadelta, 'adam': Adam, 'adamax': Adamax, 'nadam': Nadam, 'tfoptimizer': TFOptimizer, } # Make deserialization case-insensitive for built-in optimizers. if config['class_name'].lower() in all_classes: config['class_name'] = config['class_name'].lower() return deserialize_keras_object(config, module_objects=all_classes, custom_objects=custom_objects, printable_module_name='optimizer') def get(identifier): """Retrieves a Keras Optimizer instance. # Arguments identifier: Optimizer identifier, one of - String: name of an optimizer - Dictionary: configuration dictionary. - Keras Optimizer instance (it will be returned unchanged). - TensorFlow Optimizer instance (it will be wrapped as a Keras Optimizer). # Returns A Keras Optimizer instance. # Raises ValueError: If `identifier` cannot be interpreted. """ if K.backend() == 'tensorflow': # Wrap TF optimizer instances if isinstance(identifier, tf.train.Optimizer): return TFOptimizer(identifier) if isinstance(identifier, dict): return deserialize(identifier) elif isinstance(identifier, six.string_types): config = {'class_name': str(identifier), 'config': {}} return deserialize(config) if isinstance(identifier, Optimizer): return identifier else: raise ValueError('Could not interpret optimizer identifier: ' + str(identifier)) Keras-2.2.4/keras/objectives.py0000644000000000116100000000021213146670577016150 0ustar rooteng00000000000000"""Legacy objectives module. Only kept for backwards API compatibility. """ from __future__ import absolute_import from .losses import * Keras-2.2.4/keras/initializers.py0000644000000000116100000004014613342055016016512 0ustar rooteng00000000000000"""Built-in weight initializers. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import six from . import backend as K from .utils.generic_utils import serialize_keras_object from .utils.generic_utils import deserialize_keras_object class Initializer(object): """Initializer base class: all initializers inherit from this class. """ def __call__(self, shape, dtype=None): raise NotImplementedError def get_config(self): return {} @classmethod def from_config(cls, config): if 'dtype' in config: # Initializers saved from `tf.keras` # may contain an unused `dtype` argument. config.pop('dtype') return cls(**config) class Zeros(Initializer): """Initializer that generates tensors initialized to 0. """ def __call__(self, shape, dtype=None): return K.constant(0, shape=shape, dtype=dtype) class Ones(Initializer): """Initializer that generates tensors initialized to 1. """ def __call__(self, shape, dtype=None): return K.constant(1, shape=shape, dtype=dtype) class Constant(Initializer): """Initializer that generates tensors initialized to a constant value. # Arguments value: float; the value of the generator tensors. """ def __init__(self, value=0): self.value = value def __call__(self, shape, dtype=None): return K.constant(self.value, shape=shape, dtype=dtype) def get_config(self): return {'value': self.value} class RandomNormal(Initializer): """Initializer that generates tensors with a normal distribution. # Arguments mean: a python scalar or a scalar tensor. Mean of the random values to generate. stddev: a python scalar or a scalar tensor. Standard deviation of the random values to generate. seed: A Python integer. Used to seed the random generator. """ def __init__(self, mean=0., stddev=0.05, seed=None): self.mean = mean self.stddev = stddev self.seed = seed def __call__(self, shape, dtype=None): return K.random_normal(shape, self.mean, self.stddev, dtype=dtype, seed=self.seed) def get_config(self): return { 'mean': self.mean, 'stddev': self.stddev, 'seed': self.seed } class RandomUniform(Initializer): """Initializer that generates tensors with a uniform distribution. # Arguments minval: A python scalar or a scalar tensor. Lower bound of the range of random values to generate. maxval: A python scalar or a scalar tensor. Upper bound of the range of random values to generate. Defaults to 1 for float types. seed: A Python integer. Used to seed the random generator. """ def __init__(self, minval=-0.05, maxval=0.05, seed=None): self.minval = minval self.maxval = maxval self.seed = seed def __call__(self, shape, dtype=None): return K.random_uniform(shape, self.minval, self.maxval, dtype=dtype, seed=self.seed) def get_config(self): return { 'minval': self.minval, 'maxval': self.maxval, 'seed': self.seed, } class TruncatedNormal(Initializer): """Initializer that generates a truncated normal distribution. These values are similar to values from a `RandomNormal` except that values more than two standard deviations from the mean are discarded and re-drawn. This is the recommended initializer for neural network weights and filters. # Arguments mean: a python scalar or a scalar tensor. Mean of the random values to generate. stddev: a python scalar or a scalar tensor. Standard deviation of the random values to generate. seed: A Python integer. Used to seed the random generator. """ def __init__(self, mean=0., stddev=0.05, seed=None): self.mean = mean self.stddev = stddev self.seed = seed def __call__(self, shape, dtype=None): return K.truncated_normal(shape, self.mean, self.stddev, dtype=dtype, seed=self.seed) def get_config(self): return { 'mean': self.mean, 'stddev': self.stddev, 'seed': self.seed } class VarianceScaling(Initializer): """Initializer capable of adapting its scale to the shape of weights. With `distribution="normal"`, samples are drawn from a truncated normal distribution centered on zero, with `stddev = sqrt(scale / n)` where n is: - number of input units in the weight tensor, if mode = "fan_in" - number of output units, if mode = "fan_out" - average of the numbers of input and output units, if mode = "fan_avg" With `distribution="uniform"`, samples are drawn from a uniform distribution within [-limit, limit], with `limit = sqrt(3 * scale / n)`. # Arguments scale: Scaling factor (positive float). mode: One of "fan_in", "fan_out", "fan_avg". distribution: Random distribution to use. One of "normal", "uniform". seed: A Python integer. Used to seed the random generator. # Raises ValueError: In case of an invalid value for the "scale", mode" or "distribution" arguments. """ def __init__(self, scale=1.0, mode='fan_in', distribution='normal', seed=None): if scale <= 0.: raise ValueError('`scale` must be a positive float. Got:', scale) mode = mode.lower() if mode not in {'fan_in', 'fan_out', 'fan_avg'}: raise ValueError('Invalid `mode` argument: ' 'expected on of {"fan_in", "fan_out", "fan_avg"} ' 'but got', mode) distribution = distribution.lower() if distribution not in {'normal', 'uniform'}: raise ValueError('Invalid `distribution` argument: ' 'expected one of {"normal", "uniform"} ' 'but got', distribution) self.scale = scale self.mode = mode self.distribution = distribution self.seed = seed def __call__(self, shape, dtype=None): fan_in, fan_out = _compute_fans(shape) scale = self.scale if self.mode == 'fan_in': scale /= max(1., fan_in) elif self.mode == 'fan_out': scale /= max(1., fan_out) else: scale /= max(1., float(fan_in + fan_out) / 2) if self.distribution == 'normal': # 0.879... = scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) stddev = np.sqrt(scale) / .87962566103423978 return K.truncated_normal(shape, 0., stddev, dtype=dtype, seed=self.seed) else: limit = np.sqrt(3. * scale) return K.random_uniform(shape, -limit, limit, dtype=dtype, seed=self.seed) def get_config(self): return { 'scale': self.scale, 'mode': self.mode, 'distribution': self.distribution, 'seed': self.seed } class Orthogonal(Initializer): """Initializer that generates a random orthogonal matrix. # Arguments gain: Multiplicative factor to apply to the orthogonal matrix. seed: A Python integer. Used to seed the random generator. # References Saxe et al., http://arxiv.org/abs/1312.6120 """ def __init__(self, gain=1., seed=None): self.gain = gain self.seed = seed def __call__(self, shape, dtype=None): num_rows = 1 for dim in shape[:-1]: num_rows *= dim num_cols = shape[-1] flat_shape = (num_rows, num_cols) if self.seed is not None: np.random.seed(self.seed) a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # Pick the one with the correct shape. q = u if u.shape == flat_shape else v q = q.reshape(shape) return self.gain * q[:shape[0], :shape[1]] def get_config(self): return { 'gain': self.gain, 'seed': self.seed } class Identity(Initializer): """Initializer that generates the identity matrix. Only use for 2D matrices. If the long side of the matrix is a multiple of the short side, multiple identity matrices are concatenated along the long side. # Arguments gain: Multiplicative factor to apply to the identity matrix. """ def __init__(self, gain=1.): self.gain = gain def __call__(self, shape, dtype=None): if len(shape) != 2: raise ValueError( 'Identity matrix initializer can only be used for 2D matrices.') if max(shape) % min(shape) != 0: raise ValueError('Long side should be multiple of short side.') if shape[0] == shape[1]: return self.gain * np.identity(shape[0]) elif shape[0] > shape[1]: return self.gain * np.concatenate( [np.identity(shape[1])] * (shape[0] // shape[1]), axis=0) else: return self.gain * np.concatenate( [np.identity(shape[0])] * (shape[1] // shape[0]), axis=1) def get_config(self): return { 'gain': self.gain } def lecun_uniform(seed=None): """LeCun uniform initializer. It draws samples from a uniform distribution within [-limit, limit] where `limit` is `sqrt(3 / fan_in)` where `fan_in` is the number of input units in the weight tensor. # Arguments seed: A Python integer. Used to seed the random generator. # Returns An initializer. # References LeCun 98, Efficient Backprop, http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf """ return VarianceScaling(scale=1., mode='fan_in', distribution='uniform', seed=seed) def glorot_normal(seed=None): """Glorot normal initializer, also called Xavier normal initializer. It draws samples from a truncated normal distribution centered on 0 with `stddev = sqrt(2 / (fan_in + fan_out))` where `fan_in` is the number of input units in the weight tensor and `fan_out` is the number of output units in the weight tensor. # Arguments seed: A Python integer. Used to seed the random generator. # Returns An initializer. # References Glorot & Bengio, AISTATS 2010 http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf """ return VarianceScaling(scale=1., mode='fan_avg', distribution='normal', seed=seed) def glorot_uniform(seed=None): """Glorot uniform initializer, also called Xavier uniform initializer. It draws samples from a uniform distribution within [-limit, limit] where `limit` is `sqrt(6 / (fan_in + fan_out))` where `fan_in` is the number of input units in the weight tensor and `fan_out` is the number of output units in the weight tensor. # Arguments seed: A Python integer. Used to seed the random generator. # Returns An initializer. # References Glorot & Bengio, AISTATS 2010 http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf """ return VarianceScaling(scale=1., mode='fan_avg', distribution='uniform', seed=seed) def he_normal(seed=None): """He normal initializer. It draws samples from a truncated normal distribution centered on 0 with `stddev = sqrt(2 / fan_in)` where `fan_in` is the number of input units in the weight tensor. # Arguments seed: A Python integer. Used to seed the random generator. # Returns An initializer. # References He et al., http://arxiv.org/abs/1502.01852 """ return VarianceScaling(scale=2., mode='fan_in', distribution='normal', seed=seed) def lecun_normal(seed=None): """LeCun normal initializer. It draws samples from a truncated normal distribution centered on 0 with `stddev = sqrt(1 / fan_in)` where `fan_in` is the number of input units in the weight tensor. # Arguments seed: A Python integer. Used to seed the random generator. # Returns An initializer. # References - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) - [Efficient Backprop](http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf) """ return VarianceScaling(scale=1., mode='fan_in', distribution='normal', seed=seed) def he_uniform(seed=None): """He uniform variance scaling initializer. It draws samples from a uniform distribution within [-limit, limit] where `limit` is `sqrt(6 / fan_in)` where `fan_in` is the number of input units in the weight tensor. # Arguments seed: A Python integer. Used to seed the random generator. # Returns An initializer. # References He et al., http://arxiv.org/abs/1502.01852 """ return VarianceScaling(scale=2., mode='fan_in', distribution='uniform', seed=seed) # Compatibility aliases zero = zeros = Zeros one = ones = Ones constant = Constant uniform = random_uniform = RandomUniform normal = random_normal = RandomNormal truncated_normal = TruncatedNormal identity = Identity orthogonal = Orthogonal # Utility functions def _compute_fans(shape, data_format='channels_last'): """Computes the number of input and output units for a weight shape. # Arguments shape: Integer shape tuple. data_format: Image data format to use for convolution kernels. Note that all kernels in Keras are standardized on the `channels_last` ordering (even when inputs are set to `channels_first`). # Returns A tuple of scalars, `(fan_in, fan_out)`. # Raises ValueError: in case of invalid `data_format` argument. """ if len(shape) == 2: fan_in = shape[0] fan_out = shape[1] elif len(shape) in {3, 4, 5}: # Assuming convolution kernels (1D, 2D or 3D). # TH kernel shape: (depth, input_depth, ...) # TF kernel shape: (..., input_depth, depth) if data_format == 'channels_first': receptive_field_size = np.prod(shape[2:]) fan_in = shape[1] * receptive_field_size fan_out = shape[0] * receptive_field_size elif data_format == 'channels_last': receptive_field_size = np.prod(shape[:-2]) fan_in = shape[-2] * receptive_field_size fan_out = shape[-1] * receptive_field_size else: raise ValueError('Invalid data_format: ' + data_format) else: # No specific assumptions. fan_in = np.sqrt(np.prod(shape)) fan_out = np.sqrt(np.prod(shape)) return fan_in, fan_out def serialize(initializer): return serialize_keras_object(initializer) def deserialize(config, custom_objects=None): return deserialize_keras_object(config, module_objects=globals(), custom_objects=custom_objects, printable_module_name='initializer') def get(identifier): if isinstance(identifier, dict): return deserialize(identifier) elif isinstance(identifier, six.string_types): config = {'class_name': str(identifier), 'config': {}} return deserialize(config) elif callable(identifier): return identifier else: raise ValueError('Could not interpret initializer identifier: ' + str(identifier)) Keras-2.2.4/keras/callbacks.py0000644000000000116100000014445213354530144015733 0ustar rooteng00000000000000"""Callbacks: utilities called at certain points during model training. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import csv import six import numpy as np import time import json import warnings import io import sys from collections import deque from collections import OrderedDict from collections import Iterable from .utils.generic_utils import Progbar from . import backend as K from .engine.training_utils import standardize_input_data try: import requests except ImportError: requests = None class CallbackList(object): """Container abstracting a list of callbacks. # Arguments callbacks: List of `Callback` instances. queue_length: Queue length for keeping running statistics over callback execution time. """ def __init__(self, callbacks=None, queue_length=10): callbacks = callbacks or [] self.callbacks = [c for c in callbacks] self.queue_length = queue_length def append(self, callback): self.callbacks.append(callback) def set_params(self, params): for callback in self.callbacks: callback.set_params(params) def set_model(self, model): for callback in self.callbacks: callback.set_model(model) def on_epoch_begin(self, epoch, logs=None): """Called at the start of an epoch. # Arguments epoch: integer, index of epoch. logs: dictionary of logs. """ logs = logs or {} for callback in self.callbacks: callback.on_epoch_begin(epoch, logs) self._delta_t_batch = 0. self._delta_ts_batch_begin = deque([], maxlen=self.queue_length) self._delta_ts_batch_end = deque([], maxlen=self.queue_length) def on_epoch_end(self, epoch, logs=None): """Called at the end of an epoch. # Arguments epoch: integer, index of epoch. logs: dictionary of logs. """ logs = logs or {} for callback in self.callbacks: callback.on_epoch_end(epoch, logs) def on_batch_begin(self, batch, logs=None): """Called right before processing a batch. # Arguments batch: integer, index of batch within the current epoch. logs: dictionary of logs. """ logs = logs or {} t_before_callbacks = time.time() for callback in self.callbacks: callback.on_batch_begin(batch, logs) self._delta_ts_batch_begin.append(time.time() - t_before_callbacks) delta_t_median = np.median(self._delta_ts_batch_begin) if (self._delta_t_batch > 0. and delta_t_median > 0.95 * self._delta_t_batch and delta_t_median > 0.1): warnings.warn('Method on_batch_begin() is slow compared ' 'to the batch update (%f). Check your callbacks.' % delta_t_median) self._t_enter_batch = time.time() def on_batch_end(self, batch, logs=None): """Called at the end of a batch. # Arguments batch: integer, index of batch within the current epoch. logs: dictionary of logs. """ logs = logs or {} if not hasattr(self, '_t_enter_batch'): self._t_enter_batch = time.time() self._delta_t_batch = time.time() - self._t_enter_batch t_before_callbacks = time.time() for callback in self.callbacks: callback.on_batch_end(batch, logs) self._delta_ts_batch_end.append(time.time() - t_before_callbacks) delta_t_median = np.median(self._delta_ts_batch_end) if (self._delta_t_batch > 0. and (delta_t_median > 0.95 * self._delta_t_batch and delta_t_median > 0.1)): warnings.warn('Method on_batch_end() is slow compared ' 'to the batch update (%f). Check your callbacks.' % delta_t_median) def on_train_begin(self, logs=None): """Called at the beginning of training. # Arguments logs: dictionary of logs. """ logs = logs or {} for callback in self.callbacks: callback.on_train_begin(logs) def on_train_end(self, logs=None): """Called at the end of training. # Arguments logs: dictionary of logs. """ logs = logs or {} for callback in self.callbacks: callback.on_train_end(logs) def __iter__(self): return iter(self.callbacks) class Callback(object): """Abstract base class used to build new callbacks. # Properties params: dict. Training parameters (eg. verbosity, batch size, number of epochs...). model: instance of `keras.models.Model`. Reference of the model being trained. The `logs` dictionary that callback methods take as argument will contain keys for quantities relevant to the current batch or epoch. Currently, the `.fit()` method of the `Sequential` model class will include the following quantities in the `logs` that it passes to its callbacks: on_epoch_end: logs include `acc` and `loss`, and optionally include `val_loss` (if validation is enabled in `fit`), and `val_acc` (if validation and accuracy monitoring are enabled). on_batch_begin: logs include `size`, the number of samples in the current batch. on_batch_end: logs include `loss`, and optionally `acc` (if accuracy monitoring is enabled). """ def __init__(self): self.validation_data = None self.model = None def set_params(self, params): self.params = params def set_model(self, model): self.model = model def on_epoch_begin(self, epoch, logs=None): pass def on_epoch_end(self, epoch, logs=None): pass def on_batch_begin(self, batch, logs=None): pass def on_batch_end(self, batch, logs=None): pass def on_train_begin(self, logs=None): pass def on_train_end(self, logs=None): pass class BaseLogger(Callback): """Callback that accumulates epoch averages of metrics. This callback is automatically applied to every Keras model. # Arguments stateful_metrics: Iterable of string names of metrics that should *not* be averaged over an epoch. Metrics in this list will be logged as-is in `on_epoch_end`. All others will be averaged in `on_epoch_end`. """ def __init__(self, stateful_metrics=None): if stateful_metrics: self.stateful_metrics = set(stateful_metrics) else: self.stateful_metrics = set() def on_epoch_begin(self, epoch, logs=None): self.seen = 0 self.totals = {} def on_batch_end(self, batch, logs=None): logs = logs or {} batch_size = logs.get('size', 0) self.seen += batch_size for k, v in logs.items(): if k in self.stateful_metrics: self.totals[k] = v else: if k in self.totals: self.totals[k] += v * batch_size else: self.totals[k] = v * batch_size def on_epoch_end(self, epoch, logs=None): if logs is not None: for k in self.params['metrics']: if k in self.totals: # Make value available to next callbacks. if k in self.stateful_metrics: logs[k] = self.totals[k] else: logs[k] = self.totals[k] / self.seen class TerminateOnNaN(Callback): """Callback that terminates training when a NaN loss is encountered. """ def on_batch_end(self, batch, logs=None): logs = logs or {} loss = logs.get('loss') if loss is not None: if np.isnan(loss) or np.isinf(loss): print('Batch %d: Invalid loss, terminating training' % (batch)) self.model.stop_training = True class ProgbarLogger(Callback): """Callback that prints metrics to stdout. # Arguments count_mode: One of "steps" or "samples". Whether the progress bar should count samples seen or steps (batches) seen. stateful_metrics: Iterable of string names of metrics that should *not* be averaged over an epoch. Metrics in this list will be logged as-is. All others will be averaged over time (e.g. loss, etc). # Raises ValueError: In case of invalid `count_mode`. """ def __init__(self, count_mode='samples', stateful_metrics=None): super(ProgbarLogger, self).__init__() if count_mode == 'samples': self.use_steps = False elif count_mode == 'steps': self.use_steps = True else: raise ValueError('Unknown `count_mode`: ' + str(count_mode)) if stateful_metrics: self.stateful_metrics = set(stateful_metrics) else: self.stateful_metrics = set() def on_train_begin(self, logs=None): self.verbose = self.params['verbose'] self.epochs = self.params['epochs'] def on_epoch_begin(self, epoch, logs=None): if self.verbose: print('Epoch %d/%d' % (epoch + 1, self.epochs)) if self.use_steps: target = self.params['steps'] else: target = self.params['samples'] self.target = target self.progbar = Progbar(target=self.target, verbose=self.verbose, stateful_metrics=self.stateful_metrics) self.seen = 0 def on_batch_begin(self, batch, logs=None): if self.seen < self.target: self.log_values = [] def on_batch_end(self, batch, logs=None): logs = logs or {} batch_size = logs.get('size', 0) if self.use_steps: self.seen += 1 else: self.seen += batch_size for k in self.params['metrics']: if k in logs: self.log_values.append((k, logs[k])) # Skip progbar update for the last batch; # will be handled by on_epoch_end. if self.verbose and self.seen < self.target: self.progbar.update(self.seen, self.log_values) def on_epoch_end(self, epoch, logs=None): logs = logs or {} for k in self.params['metrics']: if k in logs: self.log_values.append((k, logs[k])) if self.verbose: self.progbar.update(self.seen, self.log_values) class History(Callback): """Callback that records events into a `History` object. This callback is automatically applied to every Keras model. The `History` object gets returned by the `fit` method of models. """ def on_train_begin(self, logs=None): self.epoch = [] self.history = {} def on_epoch_end(self, epoch, logs=None): logs = logs or {} self.epoch.append(epoch) for k, v in logs.items(): self.history.setdefault(k, []).append(v) class ModelCheckpoint(Callback): """Save the model after every epoch. `filepath` can contain named formatting options, which will be filled the value of `epoch` and keys in `logs` (passed in `on_epoch_end`). For example: if `filepath` is `weights.{epoch:02d}-{val_loss:.2f}.hdf5`, then the model checkpoints will be saved with the epoch number and the validation loss in the filename. # Arguments filepath: string, path to save the model file. monitor: quantity to monitor. verbose: verbosity mode, 0 or 1. save_best_only: if `save_best_only=True`, the latest best model according to the quantity monitored will not be overwritten. mode: one of {auto, min, max}. If `save_best_only=True`, the decision to overwrite the current save file is made based on either the maximization or the minimization of the monitored quantity. For `val_acc`, this should be `max`, for `val_loss` this should be `min`, etc. In `auto` mode, the direction is automatically inferred from the name of the monitored quantity. save_weights_only: if True, then only the model's weights will be saved (`model.save_weights(filepath)`), else the full model is saved (`model.save(filepath)`). period: Interval (number of epochs) between checkpoints. """ def __init__(self, filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1): super(ModelCheckpoint, self).__init__() self.monitor = monitor self.verbose = verbose self.filepath = filepath self.save_best_only = save_best_only self.save_weights_only = save_weights_only self.period = period self.epochs_since_last_save = 0 if mode not in ['auto', 'min', 'max']: warnings.warn('ModelCheckpoint mode %s is unknown, ' 'fallback to auto mode.' % (mode), RuntimeWarning) mode = 'auto' if mode == 'min': self.monitor_op = np.less self.best = np.Inf elif mode == 'max': self.monitor_op = np.greater self.best = -np.Inf else: if 'acc' in self.monitor or self.monitor.startswith('fmeasure'): self.monitor_op = np.greater self.best = -np.Inf else: self.monitor_op = np.less self.best = np.Inf def on_epoch_end(self, epoch, logs=None): logs = logs or {} self.epochs_since_last_save += 1 if self.epochs_since_last_save >= self.period: self.epochs_since_last_save = 0 filepath = self.filepath.format(epoch=epoch + 1, **logs) if self.save_best_only: current = logs.get(self.monitor) if current is None: warnings.warn('Can save best model only with %s available, ' 'skipping.' % (self.monitor), RuntimeWarning) else: if self.monitor_op(current, self.best): if self.verbose > 0: print('\nEpoch %05d: %s improved from %0.5f to %0.5f,' ' saving model to %s' % (epoch + 1, self.monitor, self.best, current, filepath)) self.best = current if self.save_weights_only: self.model.save_weights(filepath, overwrite=True) else: self.model.save(filepath, overwrite=True) else: if self.verbose > 0: print('\nEpoch %05d: %s did not improve from %0.5f' % (epoch + 1, self.monitor, self.best)) else: if self.verbose > 0: print('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath)) if self.save_weights_only: self.model.save_weights(filepath, overwrite=True) else: self.model.save(filepath, overwrite=True) class EarlyStopping(Callback): """Stop training when a monitored quantity has stopped improving. # Arguments monitor: quantity to be monitored. min_delta: minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement. patience: number of epochs with no improvement after which training will be stopped. verbose: verbosity mode. mode: one of {auto, min, max}. In `min` mode, training will stop when the quantity monitored has stopped decreasing; in `max` mode it will stop when the quantity monitored has stopped increasing; in `auto` mode, the direction is automatically inferred from the name of the monitored quantity. baseline: Baseline value for the monitored quantity to reach. Training will stop if the model doesn't show improvement over the baseline. restore_best_weights: whether to restore model weights from the epoch with the best value of the monitored quantity. If False, the model weights obtained at the last step of training are used. """ def __init__(self, monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto', baseline=None, restore_best_weights=False): super(EarlyStopping, self).__init__() self.monitor = monitor self.baseline = baseline self.patience = patience self.verbose = verbose self.min_delta = min_delta self.wait = 0 self.stopped_epoch = 0 self.restore_best_weights = restore_best_weights self.best_weights = None if mode not in ['auto', 'min', 'max']: warnings.warn('EarlyStopping mode %s is unknown, ' 'fallback to auto mode.' % mode, RuntimeWarning) mode = 'auto' if mode == 'min': self.monitor_op = np.less elif mode == 'max': self.monitor_op = np.greater else: if 'acc' in self.monitor: self.monitor_op = np.greater else: self.monitor_op = np.less if self.monitor_op == np.greater: self.min_delta *= 1 else: self.min_delta *= -1 def on_train_begin(self, logs=None): # Allow instances to be re-used self.wait = 0 self.stopped_epoch = 0 if self.baseline is not None: self.best = self.baseline else: self.best = np.Inf if self.monitor_op == np.less else -np.Inf def on_epoch_end(self, epoch, logs=None): current = self.get_monitor_value(logs) if current is None: return if self.monitor_op(current - self.min_delta, self.best): self.best = current self.wait = 0 if self.restore_best_weights: self.best_weights = self.model.get_weights() else: self.wait += 1 if self.wait >= self.patience: self.stopped_epoch = epoch self.model.stop_training = True if self.restore_best_weights: if self.verbose > 0: print('Restoring model weights from the end of ' 'the best epoch') self.model.set_weights(self.best_weights) def on_train_end(self, logs=None): if self.stopped_epoch > 0 and self.verbose > 0: print('Epoch %05d: early stopping' % (self.stopped_epoch + 1)) def get_monitor_value(self, logs): monitor_value = logs.get(self.monitor) if monitor_value is None: warnings.warn( 'Early stopping conditioned on metric `%s` ' 'which is not available. Available metrics are: %s' % (self.monitor, ','.join(list(logs.keys()))), RuntimeWarning ) return monitor_value class RemoteMonitor(Callback): """Callback used to stream events to a server. Requires the `requests` library. Events are sent to `root + '/publish/epoch/end/'` by default. Calls are HTTP POST, with a `data` argument which is a JSON-encoded dictionary of event data. If send_as_json is set to True, the content type of the request will be application/json. Otherwise the serialized JSON will be send within a form # Arguments root: String; root url of the target server. path: String; path relative to `root` to which the events will be sent. field: String; JSON field under which the data will be stored. The field is used only if the payload is sent within a form (i.e. send_as_json is set to False). headers: Dictionary; optional custom HTTP headers. send_as_json: Boolean; whether the request should be send as application/json. """ def __init__(self, root='http://localhost:9000', path='/publish/epoch/end/', field='data', headers=None, send_as_json=False): super(RemoteMonitor, self).__init__() self.root = root self.path = path self.field = field self.headers = headers self.send_as_json = send_as_json def on_epoch_end(self, epoch, logs=None): if requests is None: raise ImportError('RemoteMonitor requires ' 'the `requests` library.') logs = logs or {} send = {} send['epoch'] = epoch for k, v in logs.items(): if isinstance(v, (np.ndarray, np.generic)): send[k] = v.item() else: send[k] = v try: if self.send_as_json: requests.post(self.root + self.path, json=send, headers=self.headers) else: requests.post(self.root + self.path, {self.field: json.dumps(send)}, headers=self.headers) except requests.exceptions.RequestException: warnings.warn('Warning: could not reach RemoteMonitor ' 'root server at ' + str(self.root)) class LearningRateScheduler(Callback): """Learning rate scheduler. # Arguments schedule: a function that takes an epoch index as input (integer, indexed from 0) and current learning rate and returns a new learning rate as output (float). verbose: int. 0: quiet, 1: update messages. """ def __init__(self, schedule, verbose=0): super(LearningRateScheduler, self).__init__() self.schedule = schedule self.verbose = verbose def on_epoch_begin(self, epoch, logs=None): if not hasattr(self.model.optimizer, 'lr'): raise ValueError('Optimizer must have a "lr" attribute.') lr = float(K.get_value(self.model.optimizer.lr)) try: # new API lr = self.schedule(epoch, lr) except TypeError: # old API for backward compatibility lr = self.schedule(epoch) if not isinstance(lr, (float, np.float32, np.float64)): raise ValueError('The output of the "schedule" function ' 'should be float.') K.set_value(self.model.optimizer.lr, lr) if self.verbose > 0: print('\nEpoch %05d: LearningRateScheduler setting learning ' 'rate to %s.' % (epoch + 1, lr)) def on_epoch_end(self, epoch, logs=None): logs = logs or {} logs['lr'] = K.get_value(self.model.optimizer.lr) class TensorBoard(Callback): """TensorBoard basic visualizations. [TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard) is a visualization tool provided with TensorFlow. This callback writes a log for TensorBoard, which allows you to visualize dynamic graphs of your training and test metrics, as well as activation histograms for the different layers in your model. If you have installed TensorFlow with pip, you should be able to launch TensorBoard from the command line: ```sh tensorboard --logdir=/full_path_to_your_logs ``` When using a backend other than TensorFlow, TensorBoard will still work (if you have TensorFlow installed), but the only feature available will be the display of the losses and metrics plots. # Arguments log_dir: the path of the directory where to save the log files to be parsed by TensorBoard. histogram_freq: frequency (in epochs) at which to compute activation and weight histograms for the layers of the model. If set to 0, histograms won't be computed. Validation data (or split) must be specified for histogram visualizations. write_graph: whether to visualize the graph in TensorBoard. The log file can become quite large when write_graph is set to True. write_grads: whether to visualize gradient histograms in TensorBoard. `histogram_freq` must be greater than 0. batch_size: size of batch of inputs to feed to the network for histograms computation. write_images: whether to write model weights to visualize as image in TensorBoard. embeddings_freq: frequency (in epochs) at which selected embedding layers will be saved. If set to 0, embeddings won't be computed. Data to be visualized in TensorBoard's Embedding tab must be passed as `embeddings_data`. embeddings_layer_names: a list of names of layers to keep eye on. If None or empty list all the embedding layer will be watched. embeddings_metadata: a dictionary which maps layer name to a file name in which metadata for this embedding layer is saved. See the [details](https://www.tensorflow.org/how_tos/embedding_viz/#metadata_optional) about metadata files format. In case if the same metadata file is used for all embedding layers, string can be passed. embeddings_data: data to be embedded at layers specified in `embeddings_layer_names`. Numpy array (if the model has a single input) or list of Numpy arrays (if the model has multiple inputs). Learn [more about embeddings] (https://www.tensorflow.org/programmers_guide/embedding). update_freq: `'batch'` or `'epoch'` or integer. When using `'batch'`, writes the losses and metrics to TensorBoard after each batch. The same applies for `'epoch'`. If using an integer, let's say `10000`, the callback will write the metrics and losses to TensorBoard every 10000 samples. Note that writing too frequently to TensorBoard can slow down your training. """ def __init__(self, log_dir='./logs', histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None, update_freq='epoch'): super(TensorBoard, self).__init__() global tf, projector try: import tensorflow as tf from tensorflow.contrib.tensorboard.plugins import projector except ImportError: raise ImportError('You need the TensorFlow module installed to ' 'use TensorBoard.') if K.backend() != 'tensorflow': if histogram_freq != 0: warnings.warn('You are not using the TensorFlow backend. ' 'histogram_freq was set to 0') histogram_freq = 0 if write_graph: warnings.warn('You are not using the TensorFlow backend. ' 'write_graph was set to False') write_graph = False if write_images: warnings.warn('You are not using the TensorFlow backend. ' 'write_images was set to False') write_images = False if embeddings_freq != 0: warnings.warn('You are not using the TensorFlow backend. ' 'embeddings_freq was set to 0') embeddings_freq = 0 self.log_dir = log_dir self.histogram_freq = histogram_freq self.merged = None self.write_graph = write_graph self.write_grads = write_grads self.write_images = write_images self.embeddings_freq = embeddings_freq self.embeddings_layer_names = embeddings_layer_names self.embeddings_metadata = embeddings_metadata or {} self.batch_size = batch_size self.embeddings_data = embeddings_data if update_freq == 'batch': # It is the same as writing as frequently as possible. self.update_freq = 1 else: self.update_freq = update_freq self.samples_seen = 0 self.samples_seen_at_last_write = 0 def set_model(self, model): self.model = model if K.backend() == 'tensorflow': self.sess = K.get_session() if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf.summary.histogram(mapped_weight_name, weight) if self.write_grads: grads = model.optimizer.get_gradients(model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [ grad.values if is_indexed_slices(grad) else grad for grad in grads] tf.summary.histogram('{}_grad'.format(mapped_weight_name), grads) if self.write_images: w_img = tf.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = tf.transpose(w_img) shape = K.int_shape(w_img) w_img = tf.reshape(w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = tf.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = tf.reshape(w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = tf.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf.summary.image(mapped_weight_name, w_img) if hasattr(layer, 'output'): if isinstance(layer.output, list): for i, output in enumerate(layer.output): tf.summary.histogram('{}_out_{}'.format(layer.name, i), output) else: tf.summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf.summary.merge_all() if self.write_graph: self.writer = tf.summary.FileWriter(self.log_dir, self.sess.graph) else: self.writer = tf.summary.FileWriter(self.log_dir) if self.embeddings_freq and self.embeddings_data is not None: self.embeddings_data = standardize_input_data(self.embeddings_data, model.input_names) embeddings_layer_names = self.embeddings_layer_names if not embeddings_layer_names: embeddings_layer_names = [layer.name for layer in self.model.layers if type(layer).__name__ == 'Embedding'] self.assign_embeddings = [] embeddings_vars = {} self.batch_id = batch_id = tf.placeholder(tf.int32) self.step = step = tf.placeholder(tf.int32) for layer in self.model.layers: if layer.name in embeddings_layer_names: embedding_input = self.model.get_layer(layer.name).output embedding_size = np.prod(embedding_input.shape[1:]) embedding_input = tf.reshape(embedding_input, (step, int(embedding_size))) shape = (self.embeddings_data[0].shape[0], int(embedding_size)) embedding = tf.Variable(tf.zeros(shape), name=layer.name + '_embedding') embeddings_vars[layer.name] = embedding batch = tf.assign(embedding[batch_id:batch_id + step], embedding_input) self.assign_embeddings.append(batch) self.saver = tf.train.Saver(list(embeddings_vars.values())) embeddings_metadata = {} if not isinstance(self.embeddings_metadata, str): embeddings_metadata = self.embeddings_metadata else: embeddings_metadata = {layer_name: self.embeddings_metadata for layer_name in embeddings_vars.keys()} config = projector.ProjectorConfig() for layer_name, tensor in embeddings_vars.items(): embedding = config.embeddings.add() embedding.tensor_name = tensor.name if layer_name in embeddings_metadata: embedding.metadata_path = embeddings_metadata[layer_name] projector.visualize_embeddings(self.writer, config) def on_epoch_end(self, epoch, logs=None): logs = logs or {} if not self.validation_data and self.histogram_freq: raise ValueError("If printing histograms, validation_data must be " "provided, and cannot be a generator.") if self.embeddings_data is None and self.embeddings_freq: raise ValueError("To visualize embeddings, embeddings_data must " "be provided.") if self.validation_data and self.histogram_freq: if epoch % self.histogram_freq == 0: val_data = self.validation_data tensors = (self.model.inputs + self.model.targets + self.model.sample_weights) if self.model.uses_learning_phase: tensors += [K.learning_phase()] assert len(val_data) == len(tensors) val_size = val_data[0].shape[0] i = 0 while i < val_size: step = min(self.batch_size, val_size - i) if self.model.uses_learning_phase: # do not slice the learning phase batch_val = [x[i:i + step] for x in val_data[:-1]] batch_val.append(val_data[-1]) else: batch_val = [x[i:i + step] for x in val_data] assert len(batch_val) == len(tensors) feed_dict = dict(zip(tensors, batch_val)) result = self.sess.run([self.merged], feed_dict=feed_dict) summary_str = result[0] self.writer.add_summary(summary_str, epoch) i += self.batch_size if self.embeddings_freq and self.embeddings_data is not None: if epoch % self.embeddings_freq == 0: # We need a second forward-pass here because we're passing # the `embeddings_data` explicitly. This design allows to pass # arbitrary data as `embeddings_data` and results from the fact # that we need to know the size of the `tf.Variable`s which # hold the embeddings in `set_model`. At this point, however, # the `validation_data` is not yet set. # More details in this discussion: # https://github.com/keras-team/keras/pull/7766#issuecomment-329195622 embeddings_data = self.embeddings_data n_samples = embeddings_data[0].shape[0] i = 0 while i < n_samples: step = min(self.batch_size, n_samples - i) batch = slice(i, i + step) if type(self.model.input) == list: feed_dict = {_input: embeddings_data[idx][batch] for idx, _input in enumerate(self.model.input)} else: feed_dict = {self.model.input: embeddings_data[0][batch]} feed_dict.update({self.batch_id: i, self.step: step}) if self.model.uses_learning_phase: feed_dict[K.learning_phase()] = False self.sess.run(self.assign_embeddings, feed_dict=feed_dict) self.saver.save(self.sess, os.path.join(self.log_dir, 'keras_embedding.ckpt'), epoch) i += self.batch_size if self.update_freq == 'epoch': index = epoch else: index = self.samples_seen self._write_logs(logs, index) def _write_logs(self, logs, index): for name, value in logs.items(): if name in ['batch', 'size']: continue summary = tf.Summary() summary_value = summary.value.add() if isinstance(value, np.ndarray): summary_value.simple_value = value.item() else: summary_value.simple_value = value summary_value.tag = name self.writer.add_summary(summary, index) self.writer.flush() def on_train_end(self, _): self.writer.close() def on_batch_end(self, batch, logs=None): if self.update_freq != 'epoch': self.samples_seen += logs['size'] samples_seen_since = self.samples_seen - self.samples_seen_at_last_write if samples_seen_since >= self.update_freq: self._write_logs(logs, self.samples_seen) self.samples_seen_at_last_write = self.samples_seen class ReduceLROnPlateau(Callback): """Reduce learning rate when a metric has stopped improving. Models often benefit from reducing the learning rate by a factor of 2-10 once learning stagnates. This callback monitors a quantity and if no improvement is seen for a 'patience' number of epochs, the learning rate is reduced. # Example ```python reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001) model.fit(X_train, Y_train, callbacks=[reduce_lr]) ``` # Arguments monitor: quantity to be monitored. factor: factor by which the learning rate will be reduced. new_lr = lr * factor patience: number of epochs with no improvement after which learning rate will be reduced. verbose: int. 0: quiet, 1: update messages. mode: one of {auto, min, max}. In `min` mode, lr will be reduced when the quantity monitored has stopped decreasing; in `max` mode it will be reduced when the quantity monitored has stopped increasing; in `auto` mode, the direction is automatically inferred from the name of the monitored quantity. min_delta: threshold for measuring the new optimum, to only focus on significant changes. cooldown: number of epochs to wait before resuming normal operation after lr has been reduced. min_lr: lower bound on the learning rate. """ def __init__(self, monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='auto', min_delta=1e-4, cooldown=0, min_lr=0, **kwargs): super(ReduceLROnPlateau, self).__init__() self.monitor = monitor if factor >= 1.0: raise ValueError('ReduceLROnPlateau ' 'does not support a factor >= 1.0.') if 'epsilon' in kwargs: min_delta = kwargs.pop('epsilon') warnings.warn('`epsilon` argument is deprecated and ' 'will be removed, use `min_delta` instead.') self.factor = factor self.min_lr = min_lr self.min_delta = min_delta self.patience = patience self.verbose = verbose self.cooldown = cooldown self.cooldown_counter = 0 # Cooldown counter. self.wait = 0 self.best = 0 self.mode = mode self.monitor_op = None self._reset() def _reset(self): """Resets wait counter and cooldown counter. """ if self.mode not in ['auto', 'min', 'max']: warnings.warn('Learning Rate Plateau Reducing mode %s is unknown, ' 'fallback to auto mode.' % (self.mode), RuntimeWarning) self.mode = 'auto' if (self.mode == 'min' or (self.mode == 'auto' and 'acc' not in self.monitor)): self.monitor_op = lambda a, b: np.less(a, b - self.min_delta) self.best = np.Inf else: self.monitor_op = lambda a, b: np.greater(a, b + self.min_delta) self.best = -np.Inf self.cooldown_counter = 0 self.wait = 0 def on_train_begin(self, logs=None): self._reset() def on_epoch_end(self, epoch, logs=None): logs = logs or {} logs['lr'] = K.get_value(self.model.optimizer.lr) current = logs.get(self.monitor) if current is None: warnings.warn( 'Reduce LR on plateau conditioned on metric `%s` ' 'which is not available. Available metrics are: %s' % (self.monitor, ','.join(list(logs.keys()))), RuntimeWarning ) else: if self.in_cooldown(): self.cooldown_counter -= 1 self.wait = 0 if self.monitor_op(current, self.best): self.best = current self.wait = 0 elif not self.in_cooldown(): self.wait += 1 if self.wait >= self.patience: old_lr = float(K.get_value(self.model.optimizer.lr)) if old_lr > self.min_lr: new_lr = old_lr * self.factor new_lr = max(new_lr, self.min_lr) K.set_value(self.model.optimizer.lr, new_lr) if self.verbose > 0: print('\nEpoch %05d: ReduceLROnPlateau reducing ' 'learning rate to %s.' % (epoch + 1, new_lr)) self.cooldown_counter = self.cooldown self.wait = 0 def in_cooldown(self): return self.cooldown_counter > 0 class CSVLogger(Callback): """Callback that streams epoch results to a csv file. Supports all values that can be represented as a string, including 1D iterables such as np.ndarray. # Example ```python csv_logger = CSVLogger('training.log') model.fit(X_train, Y_train, callbacks=[csv_logger]) ``` # Arguments filename: filename of the csv file, e.g. 'run/log.csv'. separator: string used to separate elements in the csv file. append: True: append if file exists (useful for continuing training). False: overwrite existing file, """ def __init__(self, filename, separator=',', append=False): self.sep = separator self.filename = filename self.append = append self.writer = None self.keys = None self.append_header = True if six.PY2: self.file_flags = 'b' self._open_args = {} else: self.file_flags = '' self._open_args = {'newline': '\n'} super(CSVLogger, self).__init__() def on_train_begin(self, logs=None): if self.append: if os.path.exists(self.filename): with open(self.filename, 'r' + self.file_flags) as f: self.append_header = not bool(len(f.readline())) mode = 'a' else: mode = 'w' self.csv_file = io.open(self.filename, mode + self.file_flags, **self._open_args) def on_epoch_end(self, epoch, logs=None): logs = logs or {} def handle_value(k): is_zero_dim_ndarray = isinstance(k, np.ndarray) and k.ndim == 0 if isinstance(k, six.string_types): return k elif isinstance(k, Iterable) and not is_zero_dim_ndarray: return '"[%s]"' % (', '.join(map(str, k))) else: return k if self.keys is None: self.keys = sorted(logs.keys()) if self.model.stop_training: # We set NA so that csv parsers do not fail for this last epoch. logs = dict([(k, logs[k] if k in logs else 'NA') for k in self.keys]) if not self.writer: class CustomDialect(csv.excel): delimiter = self.sep fieldnames = ['epoch'] + self.keys if six.PY2: fieldnames = [unicode(x) for x in fieldnames] self.writer = csv.DictWriter(self.csv_file, fieldnames=fieldnames, dialect=CustomDialect) if self.append_header: self.writer.writeheader() row_dict = OrderedDict({'epoch': epoch}) row_dict.update((key, handle_value(logs[key])) for key in self.keys) self.writer.writerow(row_dict) self.csv_file.flush() def on_train_end(self, logs=None): self.csv_file.close() self.writer = None class LambdaCallback(Callback): r"""Callback for creating simple, custom callbacks on-the-fly. This callback is constructed with anonymous functions that will be called at the appropriate time. Note that the callbacks expects positional arguments, as: - `on_epoch_begin` and `on_epoch_end` expect two positional arguments: `epoch`, `logs` - `on_batch_begin` and `on_batch_end` expect two positional arguments: `batch`, `logs` - `on_train_begin` and `on_train_end` expect one positional argument: `logs` # Arguments on_epoch_begin: called at the beginning of every epoch. on_epoch_end: called at the end of every epoch. on_batch_begin: called at the beginning of every batch. on_batch_end: called at the end of every batch. on_train_begin: called at the beginning of model training. on_train_end: called at the end of model training. # Example ```python # Print the batch number at the beginning of every batch. batch_print_callback = LambdaCallback( on_batch_begin=lambda batch,logs: print(batch)) # Stream the epoch loss to a file in JSON format. The file content # is not well-formed JSON but rather has a JSON object per line. import json json_log = open('loss_log.json', mode='wt', buffering=1) json_logging_callback = LambdaCallback( on_epoch_end=lambda epoch, logs: json_log.write( json.dumps({'epoch': epoch, 'loss': logs['loss']}) + '\n'), on_train_end=lambda logs: json_log.close() ) # Terminate some processes after having finished model training. processes = ... cleanup_callback = LambdaCallback( on_train_end=lambda logs: [ p.terminate() for p in processes if p.is_alive()]) model.fit(..., callbacks=[batch_print_callback, json_logging_callback, cleanup_callback]) ``` """ def __init__(self, on_epoch_begin=None, on_epoch_end=None, on_batch_begin=None, on_batch_end=None, on_train_begin=None, on_train_end=None, **kwargs): super(LambdaCallback, self).__init__() self.__dict__.update(kwargs) if on_epoch_begin is not None: self.on_epoch_begin = on_epoch_begin else: self.on_epoch_begin = lambda epoch, logs: None if on_epoch_end is not None: self.on_epoch_end = on_epoch_end else: self.on_epoch_end = lambda epoch, logs: None if on_batch_begin is not None: self.on_batch_begin = on_batch_begin else: self.on_batch_begin = lambda batch, logs: None if on_batch_end is not None: self.on_batch_end = on_batch_end else: self.on_batch_end = lambda batch, logs: None if on_train_begin is not None: self.on_train_begin = on_train_begin else: self.on_train_begin = lambda logs: None if on_train_end is not None: self.on_train_end = on_train_end else: self.on_train_end = lambda logs: None Keras-2.2.4/keras/applications/0000755000000000116100000000000013355226624016124 5ustar rooteng00000000000000Keras-2.2.4/keras/applications/inception_resnet_v2.py0000644000000000116100000000112613342055016022444 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras_applications import inception_resnet_v2 from . import keras_modules_injection @keras_modules_injection def InceptionResNetV2(*args, **kwargs): return inception_resnet_v2.InceptionResNetV2(*args, **kwargs) @keras_modules_injection def decode_predictions(*args, **kwargs): return inception_resnet_v2.decode_predictions(*args, **kwargs) @keras_modules_injection def preprocess_input(*args, **kwargs): return inception_resnet_v2.preprocess_input(*args, **kwargs) Keras-2.2.4/keras/applications/imagenet_utils.py0000644000000000116100000000102313342055016021472 0ustar rooteng00000000000000"""Utilities for ImageNet data preprocessing & prediction decoding. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras_applications import imagenet_utils from . import keras_modules_injection @keras_modules_injection def decode_predictions(*args, **kwargs): return imagenet_utils.decode_predictions( *args, **kwargs) @keras_modules_injection def preprocess_input(*args, **kwargs): return imagenet_utils.preprocess_input(*args, **kwargs) Keras-2.2.4/keras/applications/densenet.py0000644000000000116100000000137213342055016020275 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras_applications import densenet from . import keras_modules_injection @keras_modules_injection def DenseNet121(*args, **kwargs): return densenet.DenseNet121(*args, **kwargs) @keras_modules_injection def DenseNet169(*args, **kwargs): return densenet.DenseNet169(*args, **kwargs) @keras_modules_injection def DenseNet201(*args, **kwargs): return densenet.DenseNet201(*args, **kwargs) @keras_modules_injection def decode_predictions(*args, **kwargs): return densenet.decode_predictions(*args, **kwargs) @keras_modules_injection def preprocess_input(*args, **kwargs): return densenet.preprocess_input(*args, **kwargs) Keras-2.2.4/keras/applications/mobilenet_v2.py0000644000000000116100000000105613342055016021054 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras_applications import mobilenet_v2 from . import keras_modules_injection @keras_modules_injection def MobileNetV2(*args, **kwargs): return mobilenet_v2.MobileNetV2(*args, **kwargs) @keras_modules_injection def decode_predictions(*args, **kwargs): return mobilenet_v2.decode_predictions(*args, **kwargs) @keras_modules_injection def preprocess_input(*args, **kwargs): return mobilenet_v2.preprocess_input(*args, **kwargs) Keras-2.2.4/keras/applications/__init__.py0000644000000000116100000000222013342055016020220 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function from .. import backend from .. import layers from .. import models from .. import utils import keras_applications if not hasattr(keras_applications, 'get_submodules_from_kwargs'): keras_applications.set_keras_submodules( backend=backend, layers=layers, models=models, utils=utils) def keras_modules_injection(base_fun): def wrapper(*args, **kwargs): if hasattr(keras_applications, 'get_submodules_from_kwargs'): kwargs['backend'] = backend kwargs['layers'] = layers kwargs['models'] = models kwargs['utils'] = utils return base_fun(*args, **kwargs) return wrapper from .vgg16 import VGG16 from .vgg19 import VGG19 from .resnet50 import ResNet50 from .inception_v3 import InceptionV3 from .inception_resnet_v2 import InceptionResNetV2 from .xception import Xception from .mobilenet import MobileNet from .mobilenet_v2 import MobileNetV2 from .densenet import DenseNet121, DenseNet169, DenseNet201 from .nasnet import NASNetMobile, NASNetLarge Keras-2.2.4/keras/applications/mobilenet.py0000644000000000116100000000103613342055016020443 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras_applications import mobilenet from . import keras_modules_injection @keras_modules_injection def MobileNet(*args, **kwargs): return mobilenet.MobileNet(*args, **kwargs) @keras_modules_injection def decode_predictions(*args, **kwargs): return mobilenet.decode_predictions(*args, **kwargs) @keras_modules_injection def preprocess_input(*args, **kwargs): return mobilenet.preprocess_input(*args, **kwargs) Keras-2.2.4/keras/applications/inception_v3.py0000644000000000116100000000105613342055016021067 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras_applications import inception_v3 from . import keras_modules_injection @keras_modules_injection def InceptionV3(*args, **kwargs): return inception_v3.InceptionV3(*args, **kwargs) @keras_modules_injection def decode_predictions(*args, **kwargs): return inception_v3.decode_predictions(*args, **kwargs) @keras_modules_injection def preprocess_input(*args, **kwargs): return inception_v3.preprocess_input(*args, **kwargs) Keras-2.2.4/keras/applications/mobilenetv2.py0000644000000000116100000000010013342055016020702 0ustar rooteng00000000000000# Only for backwards compatibility. from .mobilenet_v2 import * Keras-2.2.4/keras/applications/vgg16.py0000644000000000116100000000100613342055016017414 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras_applications import vgg16 from . import keras_modules_injection @keras_modules_injection def VGG16(*args, **kwargs): return vgg16.VGG16(*args, **kwargs) @keras_modules_injection def decode_predictions(*args, **kwargs): return vgg16.decode_predictions(*args, **kwargs) @keras_modules_injection def preprocess_input(*args, **kwargs): return vgg16.preprocess_input(*args, **kwargs) Keras-2.2.4/keras/applications/vgg19.py0000644000000000116100000000100613342055016017417 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras_applications import vgg19 from . import keras_modules_injection @keras_modules_injection def VGG19(*args, **kwargs): return vgg19.VGG19(*args, **kwargs) @keras_modules_injection def decode_predictions(*args, **kwargs): return vgg19.decode_predictions(*args, **kwargs) @keras_modules_injection def preprocess_input(*args, **kwargs): return vgg19.preprocess_input(*args, **kwargs) Keras-2.2.4/keras/applications/xception.py0000644000000000116100000000103013342055016020310 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras_applications import xception from . import keras_modules_injection @keras_modules_injection def Xception(*args, **kwargs): return xception.Xception(*args, **kwargs) @keras_modules_injection def decode_predictions(*args, **kwargs): return xception.decode_predictions(*args, **kwargs) @keras_modules_injection def preprocess_input(*args, **kwargs): return xception.preprocess_input(*args, **kwargs) Keras-2.2.4/keras/applications/nasnet.py0000644000000000116100000000120413342055016017752 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras_applications import nasnet from . import keras_modules_injection @keras_modules_injection def NASNetMobile(*args, **kwargs): return nasnet.NASNetMobile(*args, **kwargs) @keras_modules_injection def NASNetLarge(*args, **kwargs): return nasnet.NASNetLarge(*args, **kwargs) @keras_modules_injection def decode_predictions(*args, **kwargs): return nasnet.decode_predictions(*args, **kwargs) @keras_modules_injection def preprocess_input(*args, **kwargs): return nasnet.preprocess_input(*args, **kwargs) Keras-2.2.4/keras/applications/resnet50.py0000644000000000116100000000103013342055016020124 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras_applications import resnet50 from . import keras_modules_injection @keras_modules_injection def ResNet50(*args, **kwargs): return resnet50.ResNet50(*args, **kwargs) @keras_modules_injection def decode_predictions(*args, **kwargs): return resnet50.decode_predictions(*args, **kwargs) @keras_modules_injection def preprocess_input(*args, **kwargs): return resnet50.preprocess_input(*args, **kwargs) Keras-2.2.4/keras/losses.py0000644000000000116100000001005513326715636015325 0ustar rooteng00000000000000"""Built-in loss functions. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import six from . import backend as K from .utils.generic_utils import deserialize_keras_object from .utils.generic_utils import serialize_keras_object def mean_squared_error(y_true, y_pred): return K.mean(K.square(y_pred - y_true), axis=-1) def mean_absolute_error(y_true, y_pred): return K.mean(K.abs(y_pred - y_true), axis=-1) def mean_absolute_percentage_error(y_true, y_pred): diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), None)) return 100. * K.mean(diff, axis=-1) def mean_squared_logarithmic_error(y_true, y_pred): first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.) second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.) return K.mean(K.square(first_log - second_log), axis=-1) def squared_hinge(y_true, y_pred): return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)), axis=-1) def hinge(y_true, y_pred): return K.mean(K.maximum(1. - y_true * y_pred, 0.), axis=-1) def categorical_hinge(y_true, y_pred): pos = K.sum(y_true * y_pred, axis=-1) neg = K.max((1. - y_true) * y_pred, axis=-1) return K.maximum(0., neg - pos + 1.) def logcosh(y_true, y_pred): """Logarithm of the hyperbolic cosine of the prediction error. `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly like the mean squared error, but will not be so strongly affected by the occasional wildly incorrect prediction. # Arguments y_true: tensor of true targets. y_pred: tensor of predicted targets. # Returns Tensor with one scalar loss entry per sample. """ def _logcosh(x): return x + K.softplus(-2. * x) - K.log(2.) return K.mean(_logcosh(y_pred - y_true), axis=-1) def categorical_crossentropy(y_true, y_pred): return K.categorical_crossentropy(y_true, y_pred) def sparse_categorical_crossentropy(y_true, y_pred): return K.sparse_categorical_crossentropy(y_true, y_pred) def binary_crossentropy(y_true, y_pred): return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) def kullback_leibler_divergence(y_true, y_pred): y_true = K.clip(y_true, K.epsilon(), 1) y_pred = K.clip(y_pred, K.epsilon(), 1) return K.sum(y_true * K.log(y_true / y_pred), axis=-1) def poisson(y_true, y_pred): return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()), axis=-1) def cosine_proximity(y_true, y_pred): y_true = K.l2_normalize(y_true, axis=-1) y_pred = K.l2_normalize(y_pred, axis=-1) return -K.sum(y_true * y_pred, axis=-1) # Aliases. mse = MSE = mean_squared_error mae = MAE = mean_absolute_error mape = MAPE = mean_absolute_percentage_error msle = MSLE = mean_squared_logarithmic_error kld = KLD = kullback_leibler_divergence cosine = cosine_proximity def serialize(loss): return serialize_keras_object(loss) def deserialize(name, custom_objects=None): return deserialize_keras_object(name, module_objects=globals(), custom_objects=custom_objects, printable_module_name='loss function') def get(identifier): """Get the `identifier` loss function. # Arguments identifier: None or str, name of the function. # Returns The loss function or None if `identifier` is None. # Raises ValueError if unknown identifier. """ if identifier is None: return None if isinstance(identifier, six.string_types): identifier = str(identifier) return deserialize(identifier) if isinstance(identifier, dict): return deserialize(identifier) elif callable(identifier): return identifier else: raise ValueError('Could not interpret ' 'loss function identifier:', identifier) Keras-2.2.4/keras/constraints.py0000644000000000116100000001404513354530144016355 0ustar rooteng00000000000000"""Constraints: functions that impose constraints on weight values. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import six from . import backend as K from .utils.generic_utils import serialize_keras_object from .utils.generic_utils import deserialize_keras_object class Constraint(object): def __call__(self, w): return w def get_config(self): return {} class MaxNorm(Constraint): """MaxNorm weight constraint. Constrains the weights incident to each hidden unit to have a norm less than or equal to a desired value. # Arguments m: the maximum norm for the incoming weights. axis: integer, axis along which to calculate weight norms. For instance, in a `Dense` layer the weight matrix has shape `(input_dim, output_dim)`, set `axis` to `0` to constrain each weight vector of length `(input_dim,)`. In a `Conv2D` layer with `data_format="channels_last"`, the weight tensor has shape `(rows, cols, input_depth, output_depth)`, set `axis` to `[0, 1, 2]` to constrain the weights of each filter tensor of size `(rows, cols, input_depth)`. # References - [Dropout: A Simple Way to Prevent Neural Networks from Overfitting] (http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) """ def __init__(self, max_value=2, axis=0): self.max_value = max_value self.axis = axis def __call__(self, w): norms = K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True)) desired = K.clip(norms, 0, self.max_value) w *= (desired / (K.epsilon() + norms)) return w def get_config(self): return {'max_value': self.max_value, 'axis': self.axis} class NonNeg(Constraint): """Constrains the weights to be non-negative. """ def __call__(self, w): w *= K.cast(K.greater_equal(w, 0.), K.floatx()) return w class UnitNorm(Constraint): """Constrains the weights incident to each hidden unit to have unit norm. # Arguments axis: integer, axis along which to calculate weight norms. For instance, in a `Dense` layer the weight matrix has shape `(input_dim, output_dim)`, set `axis` to `0` to constrain each weight vector of length `(input_dim,)`. In a `Conv2D` layer with `data_format="channels_last"`, the weight tensor has shape `(rows, cols, input_depth, output_depth)`, set `axis` to `[0, 1, 2]` to constrain the weights of each filter tensor of size `(rows, cols, input_depth)`. """ def __init__(self, axis=0): self.axis = axis def __call__(self, w): return w / (K.epsilon() + K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True))) def get_config(self): return {'axis': self.axis} class MinMaxNorm(Constraint): """MinMaxNorm weight constraint. Constrains the weights incident to each hidden unit to have the norm between a lower bound and an upper bound. # Arguments min_value: the minimum norm for the incoming weights. max_value: the maximum norm for the incoming weights. rate: rate for enforcing the constraint: weights will be rescaled to yield `(1 - rate) * norm + rate * norm.clip(min_value, max_value)`. Effectively, this means that rate=1.0 stands for strict enforcement of the constraint, while rate<1.0 means that weights will be rescaled at each step to slowly move towards a value inside the desired interval. axis: integer, axis along which to calculate weight norms. For instance, in a `Dense` layer the weight matrix has shape `(input_dim, output_dim)`, set `axis` to `0` to constrain each weight vector of length `(input_dim,)`. In a `Conv2D` layer with `data_format="channels_last"`, the weight tensor has shape `(rows, cols, input_depth, output_depth)`, set `axis` to `[0, 1, 2]` to constrain the weights of each filter tensor of size `(rows, cols, input_depth)`. """ def __init__(self, min_value=0.0, max_value=1.0, rate=1.0, axis=0): self.min_value = min_value self.max_value = max_value self.rate = rate self.axis = axis def __call__(self, w): norms = K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True)) desired = (self.rate * K.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms) w *= (desired / (K.epsilon() + norms)) return w def get_config(self): return {'min_value': self.min_value, 'max_value': self.max_value, 'rate': self.rate, 'axis': self.axis} # Aliases. max_norm = MaxNorm non_neg = NonNeg unit_norm = UnitNorm min_max_norm = MinMaxNorm # Legacy aliases. maxnorm = max_norm nonneg = non_neg unitnorm = unit_norm def serialize(constraint): return serialize_keras_object(constraint) def deserialize(config, custom_objects=None): return deserialize_keras_object(config, module_objects=globals(), custom_objects=custom_objects, printable_module_name='constraint') def get(identifier): if identifier is None: return None if isinstance(identifier, dict): return deserialize(identifier) elif isinstance(identifier, six.string_types): config = {'class_name': str(identifier), 'config': {}} return deserialize(config) elif callable(identifier): return identifier else: raise ValueError('Could not interpret constraint identifier: ' + str(identifier)) Keras-2.2.4/keras/preprocessing/0000755000000000116100000000000013355226624016321 5ustar rooteng00000000000000Keras-2.2.4/keras/preprocessing/sequence.py0000644000000000116100000000605013342055016020473 0ustar rooteng00000000000000"""Utilities for preprocessing sequence data. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras_preprocessing import sequence from .. import utils pad_sequences = sequence.pad_sequences make_sampling_table = sequence.make_sampling_table skipgrams = sequence.skipgrams _remove_long_seq = sequence._remove_long_seq # TODO: make it public? class TimeseriesGenerator(sequence.TimeseriesGenerator, utils.Sequence): """Utility class for generating batches of temporal data. This class takes in a sequence of data-points gathered at equal intervals, along with time series parameters such as stride, length of history, etc., to produce batches for training/validation. # Arguments data: Indexable generator (such as list or Numpy array) containing consecutive data points (timesteps). The data should be at 2D, and axis 0 is expected to be the time dimension. targets: Targets corresponding to timesteps in `data`. It should have same length as `data`. length: Length of the output sequences (in number of timesteps). sampling_rate: Period between successive individual timesteps within sequences. For rate `r`, timesteps `data[i]`, `data[i-r]`, ... `data[i - length]` are used for create a sample sequence. stride: Period between successive output sequences. For stride `s`, consecutive output samples would be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc. start_index: Data points earlier than `start_index` will not be used in the output sequences. This is useful to reserve part of the data for test or validation. end_index: Data points later than `end_index` will not be used in the output sequences. This is useful to reserve part of the data for test or validation. shuffle: Whether to shuffle output samples, or instead draw them in chronological order. reverse: Boolean: if `true`, timesteps in each output sample will be in reverse chronological order. batch_size: Number of timeseries samples in each batch (except maybe the last one). # Returns A [Sequence](/utils/#sequence) instance. # Examples ```python from keras.preprocessing.sequence import TimeseriesGenerator import numpy as np data = np.array([[i] for i in range(50)]) targets = np.array([[i] for i in range(50)]) data_gen = TimeseriesGenerator(data, targets, length=10, sampling_rate=2, batch_size=2) assert len(data_gen) == 20 batch_0 = data_gen[0] x, y = batch_0 assert np.array_equal(x, np.array([[[0], [2], [4], [6], [8]], [[1], [3], [5], [7], [9]]])) assert np.array_equal(y, np.array([[10], [11]])) ``` """ pass Keras-2.2.4/keras/preprocessing/__init__.py0000644000000000116100000000047513305602621020426 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import division from __future__ import print_function from .. import backend from .. import utils import keras_preprocessing keras_preprocessing.set_keras_submodules(backend=backend, utils=utils) from . import image from . import sequence from . import text Keras-2.2.4/keras/preprocessing/text.py0000644000000000116100000000051313305602621017644 0ustar rooteng00000000000000"""Utilities for text input preprocessing. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras_preprocessing import text text_to_word_sequence = text.text_to_word_sequence one_hot = text.one_hot hashing_trick = text.hashing_trick Tokenizer = text.Tokenizer Keras-2.2.4/keras/preprocessing/image.py0000644000000000116100000004552313342055016017755 0ustar rooteng00000000000000"""Utilities for real-time data augmentation on image data. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import inspect from .. import backend from .. import utils from keras_preprocessing import image random_rotation = image.random_rotation random_shift = image.random_shift random_shear = image.random_shear random_zoom = image.random_zoom apply_channel_shift = image.apply_channel_shift random_channel_shift = image.random_channel_shift apply_brightness_shift = image.apply_brightness_shift random_brightness = image.random_brightness apply_affine_transform = image.apply_affine_transform load_img = image.load_img def array_to_img(x, data_format=None, scale=True, dtype=None): if data_format is None: data_format = backend.image_data_format() if 'dtype' in inspect.getargspec(image.array_to_img).args: if dtype is None: dtype = backend.floatx() return image.array_to_img(x, data_format=data_format, scale=scale, dtype=dtype) return image.array_to_img(x, data_format=data_format, scale=scale) def img_to_array(img, data_format=None, dtype=None): if data_format is None: data_format = backend.image_data_format() if 'dtype' in inspect.getargspec(image.img_to_array).args: if dtype is None: dtype = backend.floatx() return image.img_to_array(img, data_format=data_format, dtype=dtype) return image.img_to_array(img, data_format=data_format) def save_img(path, x, data_format=None, file_format=None, scale=True, **kwargs): if data_format is None: data_format = backend.image_data_format() return image.save_img(path, x, data_format=data_format, file_format=file_format, scale=scale, **kwargs) class Iterator(image.Iterator, utils.Sequence): """Base class for image data iterators. Every `Iterator` must implement the `_get_batches_of_transformed_samples` method. # Arguments n: Integer, total number of samples in the dataset to loop over. batch_size: Integer, size of a batch. shuffle: Boolean, whether to shuffle the data between epochs. seed: Random seeding for data shuffling. """ pass class DirectoryIterator(image.DirectoryIterator, Iterator): """Iterator capable of reading images from a directory on disk. # Arguments directory: Path to the directory to read images from. Each subdirectory in this directory will be considered to contain images from one class, or alternatively you could specify class subdirectories via the `classes` argument. image_data_generator: Instance of `ImageDataGenerator` to use for random transformations and normalization. target_size: tuple of integers, dimensions to resize input images to. color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. Color mode to read images. classes: Optional list of strings, names of subdirectories containing images from each class (e.g. `["dogs", "cats"]`). It will be computed automatically if not set. class_mode: Mode for yielding the targets: `"binary"`: binary targets (if there are only two classes), `"categorical"`: categorical targets, `"sparse"`: integer targets, `"input"`: targets are images identical to input images (mainly used to work with autoencoders), `None`: no targets get yielded (only input images are yielded). batch_size: Integer, size of a batch. shuffle: Boolean, whether to shuffle the data between epochs. seed: Random seed for data shuffling. data_format: String, one of `channels_first`, `channels_last`. save_to_dir: Optional directory where to save the pictures being yielded, in a viewable format. This is useful for visualizing the random transformations being applied, for debugging purposes. save_prefix: String prefix to use for saving sample images (if `save_to_dir` is set). save_format: Format to use for saving sample images (if `save_to_dir` is set). subset: Subset of data (`"training"` or `"validation"`) if validation_split is set in ImageDataGenerator. interpolation: Interpolation method used to resample the image if the target size is different from that of the loaded image. Supported methods are "nearest", "bilinear", and "bicubic". If PIL version 1.1.3 or newer is installed, "lanczos" is also supported. If PIL version 3.4.0 or newer is installed, "box" and "hamming" are also supported. By default, "nearest" is used. dtype: Dtype to use for generated arrays. """ def __init__(self, directory, image_data_generator, target_size=(256, 256), color_mode='rgb', classes=None, class_mode='categorical', batch_size=32, shuffle=True, seed=None, data_format=None, save_to_dir=None, save_prefix='', save_format='png', follow_links=False, subset=None, interpolation='nearest', dtype=None): if data_format is None: data_format = backend.image_data_format() kwargs = {} if 'dtype' in inspect.getargspec( image.ImageDataGenerator.__init__).args: if dtype is None: dtype = backend.floatx() kwargs['dtype'] = dtype super(DirectoryIterator, self).__init__( directory, image_data_generator, target_size=target_size, color_mode=color_mode, classes=classes, class_mode=class_mode, batch_size=batch_size, shuffle=shuffle, seed=seed, data_format=data_format, save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format, follow_links=follow_links, subset=subset, interpolation=interpolation, **kwargs) class NumpyArrayIterator(image.NumpyArrayIterator, Iterator): """Iterator yielding data from a Numpy array. # Arguments x: Numpy array of input data or tuple. If tuple, the second elements is either another numpy array or a list of numpy arrays, each of which gets passed through as an output without any modifications. y: Numpy array of targets data. image_data_generator: Instance of `ImageDataGenerator` to use for random transformations and normalization. batch_size: Integer, size of a batch. shuffle: Boolean, whether to shuffle the data between epochs. sample_weight: Numpy array of sample weights. seed: Random seed for data shuffling. data_format: String, one of `channels_first`, `channels_last`. save_to_dir: Optional directory where to save the pictures being yielded, in a viewable format. This is useful for visualizing the random transformations being applied, for debugging purposes. save_prefix: String prefix to use for saving sample images (if `save_to_dir` is set). save_format: Format to use for saving sample images (if `save_to_dir` is set). subset: Subset of data (`"training"` or `"validation"`) if validation_split is set in ImageDataGenerator. dtype: Dtype to use for the generated arrays. """ def __init__(self, x, y, image_data_generator, batch_size=32, shuffle=False, sample_weight=None, seed=None, data_format=None, save_to_dir=None, save_prefix='', save_format='png', subset=None, dtype=None): if data_format is None: data_format = backend.image_data_format() kwargs = {} if 'dtype' in inspect.getargspec( image.NumpyArrayIterator.__init__).args: if dtype is None: dtype = backend.floatx() kwargs['dtype'] = dtype super(NumpyArrayIterator, self).__init__( x, y, image_data_generator, batch_size=batch_size, shuffle=shuffle, sample_weight=sample_weight, seed=seed, data_format=data_format, save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format, subset=subset, **kwargs) class ImageDataGenerator(image.ImageDataGenerator): """Generate batches of tensor image data with real-time data augmentation. The data will be looped over (in batches). # Arguments featurewise_center: Boolean. Set input mean to 0 over the dataset, feature-wise. samplewise_center: Boolean. Set each sample mean to 0. featurewise_std_normalization: Boolean. Divide inputs by std of the dataset, feature-wise. samplewise_std_normalization: Boolean. Divide each input by its std. zca_epsilon: epsilon for ZCA whitening. Default is 1e-6. zca_whitening: Boolean. Apply ZCA whitening. rotation_range: Int. Degree range for random rotations. width_shift_range: Float, 1-D array-like or int - float: fraction of total width, if < 1, or pixels if >= 1. - 1-D array-like: random elements from the array. - int: integer number of pixels from interval `(-width_shift_range, +width_shift_range)` - With `width_shift_range=2` possible values are integers `[-1, 0, +1]`, same as with `width_shift_range=[-1, 0, +1]`, while with `width_shift_range=1.0` possible values are floats in the interval [-1.0, +1.0). height_shift_range: Float, 1-D array-like or int - float: fraction of total height, if < 1, or pixels if >= 1. - 1-D array-like: random elements from the array. - int: integer number of pixels from interval `(-height_shift_range, +height_shift_range)` - With `height_shift_range=2` possible values are integers `[-1, 0, +1]`, same as with `height_shift_range=[-1, 0, +1]`, while with `height_shift_range=1.0` possible values are floats in the interval [-1.0, +1.0). brightness_range: Tuple or list of two floats. Range for picking a brightness shift value from. shear_range: Float. Shear Intensity (Shear angle in counter-clockwise direction in degrees) zoom_range: Float or [lower, upper]. Range for random zoom. If a float, `[lower, upper] = [1-zoom_range, 1+zoom_range]`. channel_shift_range: Float. Range for random channel shifts. fill_mode: One of {"constant", "nearest", "reflect" or "wrap"}. Default is 'nearest'. Points outside the boundaries of the input are filled according to the given mode: - 'constant': kkkkkkkk|abcd|kkkkkkkk (cval=k) - 'nearest': aaaaaaaa|abcd|dddddddd - 'reflect': abcddcba|abcd|dcbaabcd - 'wrap': abcdabcd|abcd|abcdabcd cval: Float or Int. Value used for points outside the boundaries when `fill_mode = "constant"`. horizontal_flip: Boolean. Randomly flip inputs horizontally. vertical_flip: Boolean. Randomly flip inputs vertically. rescale: rescaling factor. Defaults to None. If None or 0, no rescaling is applied, otherwise we multiply the data by the value provided (after applying all other transformations). preprocessing_function: function that will be implied on each input. The function will run after the image is resized and augmented. The function should take one argument: one image (Numpy tensor with rank 3), and should output a Numpy tensor with the same shape. data_format: Image data format, either "channels_first" or "channels_last". "channels_last" mode means that the images should have shape `(samples, height, width, channels)`, "channels_first" mode means that the images should have shape `(samples, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". validation_split: Float. Fraction of images reserved for validation (strictly between 0 and 1). dtype: Dtype to use for the generated arrays. # Examples Example of using `.flow(x, y)`: ```python (x_train, y_train), (x_test, y_test) = cifar10.load_data() y_train = np_utils.to_categorical(y_train, num_classes) y_test = np_utils.to_categorical(y_test, num_classes) datagen = ImageDataGenerator( featurewise_center=True, featurewise_std_normalization=True, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True) # compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied) datagen.fit(x_train) # fits the model on batches with real-time data augmentation: model.fit_generator(datagen.flow(x_train, y_train, batch_size=32), steps_per_epoch=len(x_train) / 32, epochs=epochs) # here's a more "manual" example for e in range(epochs): print('Epoch', e) batches = 0 for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size=32): model.fit(x_batch, y_batch) batches += 1 if batches >= len(x_train) / 32: # we need to break the loop by hand because # the generator loops indefinitely break ``` Example of using `.flow_from_directory(directory)`: ```python train_datagen = ImageDataGenerator( rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) test_datagen = ImageDataGenerator(rescale=1./255) train_generator = train_datagen.flow_from_directory( 'data/train', target_size=(150, 150), batch_size=32, class_mode='binary') validation_generator = test_datagen.flow_from_directory( 'data/validation', target_size=(150, 150), batch_size=32, class_mode='binary') model.fit_generator( train_generator, steps_per_epoch=2000, epochs=50, validation_data=validation_generator, validation_steps=800) ``` Example of transforming images and masks together. ```python # we create two instances with the same arguments data_gen_args = dict(featurewise_center=True, featurewise_std_normalization=True, rotation_range=90, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2) image_datagen = ImageDataGenerator(**data_gen_args) mask_datagen = ImageDataGenerator(**data_gen_args) # Provide the same seed and keyword arguments to the fit and flow methods seed = 1 image_datagen.fit(images, augment=True, seed=seed) mask_datagen.fit(masks, augment=True, seed=seed) image_generator = image_datagen.flow_from_directory( 'data/images', class_mode=None, seed=seed) mask_generator = mask_datagen.flow_from_directory( 'data/masks', class_mode=None, seed=seed) # combine generators into one which yields image and masks train_generator = zip(image_generator, mask_generator) model.fit_generator( train_generator, steps_per_epoch=2000, epochs=50) ``` """ def __init__(self, featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, samplewise_std_normalization=False, zca_whitening=False, zca_epsilon=1e-6, rotation_range=0, width_shift_range=0., height_shift_range=0., brightness_range=None, shear_range=0., zoom_range=0., channel_shift_range=0., fill_mode='nearest', cval=0., horizontal_flip=False, vertical_flip=False, rescale=None, preprocessing_function=None, data_format=None, validation_split=0.0, dtype=None): if data_format is None: data_format = backend.image_data_format() kwargs = {} if 'dtype' in inspect.getargspec( image.ImageDataGenerator.__init__).args: if dtype is None: dtype = backend.floatx() kwargs['dtype'] = dtype super(ImageDataGenerator, self).__init__( featurewise_center=featurewise_center, samplewise_center=samplewise_center, featurewise_std_normalization=featurewise_std_normalization, samplewise_std_normalization=samplewise_std_normalization, zca_whitening=zca_whitening, zca_epsilon=zca_epsilon, rotation_range=rotation_range, width_shift_range=width_shift_range, height_shift_range=height_shift_range, brightness_range=brightness_range, shear_range=shear_range, zoom_range=zoom_range, channel_shift_range=channel_shift_range, fill_mode=fill_mode, cval=cval, horizontal_flip=horizontal_flip, vertical_flip=vertical_flip, rescale=rescale, preprocessing_function=preprocessing_function, data_format=data_format, validation_split=validation_split, **kwargs) array_to_img.__doc__ = image.array_to_img.__doc__ img_to_array.__doc__ = image.img_to_array.__doc__ save_img.__doc__ = image.save_img.__doc__ Keras-2.2.4/keras/engine/0000755000000000116100000000000013355226624014703 5ustar rooteng00000000000000Keras-2.2.4/keras/engine/training_generator.py0000644000000000116100000004436613355226611021147 0ustar rooteng00000000000000"""Part of the training engine related to Python generators of array data. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import warnings import numpy as np from .training_utils import iter_sequence_infinite from .. import backend as K from ..utils.data_utils import Sequence from ..utils.data_utils import GeneratorEnqueuer from ..utils.data_utils import OrderedEnqueuer from ..utils.generic_utils import Progbar from ..utils.generic_utils import to_list from ..utils.generic_utils import unpack_singleton from .. import callbacks as cbks def fit_generator(model, generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0): """See docstring for `Model.fit_generator`.""" wait_time = 0.01 # in seconds epoch = initial_epoch do_validation = bool(validation_data) model._make_train_function() if do_validation: model._make_test_function() is_sequence = isinstance(generator, Sequence) if not is_sequence and use_multiprocessing and workers > 1: warnings.warn( UserWarning('Using a generator with `use_multiprocessing=True`' ' and multiple workers may duplicate your data.' ' Please consider using the`keras.utils.Sequence' ' class.')) if steps_per_epoch is None: if is_sequence: steps_per_epoch = len(generator) else: raise ValueError('`steps_per_epoch=None` is only valid for a' ' generator based on the ' '`keras.utils.Sequence`' ' class. Please specify `steps_per_epoch` ' 'or use the `keras.utils.Sequence` class.') # python 2 has 'next', 3 has '__next__' # avoid any explicit version checks val_gen = (hasattr(validation_data, 'next') or hasattr(validation_data, '__next__') or isinstance(validation_data, Sequence)) if (val_gen and not isinstance(validation_data, Sequence) and not validation_steps): raise ValueError('`validation_steps=None` is only valid for a' ' generator based on the `keras.utils.Sequence`' ' class. Please specify `validation_steps` or use' ' the `keras.utils.Sequence` class.') # Prepare display labels. out_labels = model.metrics_names callback_metrics = out_labels + ['val_' + n for n in out_labels] # prepare callbacks model.history = cbks.History() _callbacks = [cbks.BaseLogger( stateful_metrics=model.stateful_metric_names)] if verbose: _callbacks.append( cbks.ProgbarLogger( count_mode='steps', stateful_metrics=model.stateful_metric_names)) _callbacks += (callbacks or []) + [model.history] callbacks = cbks.CallbackList(_callbacks) # it's possible to callback a different model than self: if hasattr(model, 'callback_model') and model.callback_model: callback_model = model.callback_model else: callback_model = model callbacks.set_model(callback_model) callbacks.set_params({ 'epochs': epochs, 'steps': steps_per_epoch, 'verbose': verbose, 'do_validation': do_validation, 'metrics': callback_metrics, }) callbacks.on_train_begin() enqueuer = None val_enqueuer = None try: if do_validation: if val_gen and workers > 0: # Create an Enqueuer that can be reused val_data = validation_data if isinstance(val_data, Sequence): val_enqueuer = OrderedEnqueuer( val_data, use_multiprocessing=use_multiprocessing) validation_steps = validation_steps or len(val_data) else: val_enqueuer = GeneratorEnqueuer( val_data, use_multiprocessing=use_multiprocessing) val_enqueuer.start(workers=workers, max_queue_size=max_queue_size) val_enqueuer_gen = val_enqueuer.get() elif val_gen: val_data = validation_data if isinstance(val_data, Sequence): val_enqueuer_gen = iter_sequence_infinite(val_data) validation_steps = validation_steps or len(val_data) else: val_enqueuer_gen = val_data else: # Prepare data for validation if len(validation_data) == 2: val_x, val_y = validation_data val_sample_weight = None elif len(validation_data) == 3: val_x, val_y, val_sample_weight = validation_data else: raise ValueError('`validation_data` should be a tuple ' '`(val_x, val_y, val_sample_weight)` ' 'or `(val_x, val_y)`. Found: ' + str(validation_data)) val_x, val_y, val_sample_weights = model._standardize_user_data( val_x, val_y, val_sample_weight) val_data = val_x + val_y + val_sample_weights if model.uses_learning_phase and not isinstance(K.learning_phase(), int): val_data += [0.] for cbk in callbacks: cbk.validation_data = val_data if workers > 0: if is_sequence: enqueuer = OrderedEnqueuer( generator, use_multiprocessing=use_multiprocessing, shuffle=shuffle) else: enqueuer = GeneratorEnqueuer( generator, use_multiprocessing=use_multiprocessing, wait_time=wait_time) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() else: if is_sequence: output_generator = iter_sequence_infinite(generator) else: output_generator = generator callback_model.stop_training = False # Construct epoch logs. epoch_logs = {} while epoch < epochs: for m in model.stateful_metric_functions: m.reset_states() callbacks.on_epoch_begin(epoch) steps_done = 0 batch_index = 0 while steps_done < steps_per_epoch: generator_output = next(output_generator) if not hasattr(generator_output, '__len__'): raise ValueError('Output of generator should be ' 'a tuple `(x, y, sample_weight)` ' 'or `(x, y)`. Found: ' + str(generator_output)) if len(generator_output) == 2: x, y = generator_output sample_weight = None elif len(generator_output) == 3: x, y, sample_weight = generator_output else: raise ValueError('Output of generator should be ' 'a tuple `(x, y, sample_weight)` ' 'or `(x, y)`. Found: ' + str(generator_output)) # build batch logs batch_logs = {} if x is None or len(x) == 0: # Handle data tensors support when no input given # step-size = 1 for data tensors batch_size = 1 elif isinstance(x, list): batch_size = x[0].shape[0] elif isinstance(x, dict): batch_size = list(x.values())[0].shape[0] else: batch_size = x.shape[0] batch_logs['batch'] = batch_index batch_logs['size'] = batch_size callbacks.on_batch_begin(batch_index, batch_logs) outs = model.train_on_batch(x, y, sample_weight=sample_weight, class_weight=class_weight) outs = to_list(outs) for l, o in zip(out_labels, outs): batch_logs[l] = o callbacks.on_batch_end(batch_index, batch_logs) batch_index += 1 steps_done += 1 # Epoch finished. if steps_done >= steps_per_epoch and do_validation: if val_gen: val_outs = model.evaluate_generator( val_enqueuer_gen, validation_steps, workers=0) else: # No need for try/except because # data has already been validated. val_outs = model.evaluate( val_x, val_y, batch_size=batch_size, sample_weight=val_sample_weights, verbose=0) val_outs = to_list(val_outs) # Same labels assumed. for l, o in zip(out_labels, val_outs): epoch_logs['val_' + l] = o if callback_model.stop_training: break callbacks.on_epoch_end(epoch, epoch_logs) epoch += 1 if callback_model.stop_training: break finally: try: if enqueuer is not None: enqueuer.stop() finally: if val_enqueuer is not None: val_enqueuer.stop() callbacks.on_train_end() return model.history def evaluate_generator(model, generator, steps=None, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0): """See docstring for `Model.evaluate_generator`.""" model._make_test_function() if hasattr(model, 'metrics'): for m in model.stateful_metric_functions: m.reset_states() stateful_metric_indices = [ i for i, name in enumerate(model.metrics_names) if str(name) in model.stateful_metric_names] else: stateful_metric_indices = [] steps_done = 0 wait_time = 0.01 outs_per_batch = [] batch_sizes = [] is_sequence = isinstance(generator, Sequence) if not is_sequence and use_multiprocessing and workers > 1: warnings.warn( UserWarning('Using a generator with `use_multiprocessing=True`' ' and multiple workers may duplicate your data.' ' Please consider using the`keras.utils.Sequence' ' class.')) if steps is None: if is_sequence: steps = len(generator) else: raise ValueError('`steps=None` is only valid for a generator' ' based on the `keras.utils.Sequence` class.' ' Please specify `steps` or use the' ' `keras.utils.Sequence` class.') enqueuer = None try: if workers > 0: if is_sequence: enqueuer = OrderedEnqueuer( generator, use_multiprocessing=use_multiprocessing) else: enqueuer = GeneratorEnqueuer( generator, use_multiprocessing=use_multiprocessing, wait_time=wait_time) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() else: if is_sequence: output_generator = iter_sequence_infinite(generator) else: output_generator = generator if verbose == 1: progbar = Progbar(target=steps) while steps_done < steps: generator_output = next(output_generator) if not hasattr(generator_output, '__len__'): raise ValueError('Output of generator should be a tuple ' '(x, y, sample_weight) ' 'or (x, y). Found: ' + str(generator_output)) if len(generator_output) == 2: x, y = generator_output sample_weight = None elif len(generator_output) == 3: x, y, sample_weight = generator_output else: raise ValueError('Output of generator should be a tuple ' '(x, y, sample_weight) ' 'or (x, y). Found: ' + str(generator_output)) outs = model.test_on_batch(x, y, sample_weight=sample_weight) outs = to_list(outs) outs_per_batch.append(outs) if x is None or len(x) == 0: # Handle data tensors support when no input given # step-size = 1 for data tensors batch_size = 1 elif isinstance(x, list): batch_size = x[0].shape[0] elif isinstance(x, dict): batch_size = list(x.values())[0].shape[0] else: batch_size = x.shape[0] if batch_size == 0: raise ValueError('Received an empty batch. ' 'Batches should contain ' 'at least one item.') steps_done += 1 batch_sizes.append(batch_size) if verbose == 1: progbar.update(steps_done) finally: if enqueuer is not None: enqueuer.stop() averages = [] for i in range(len(outs)): if i not in stateful_metric_indices: averages.append(np.average([out[i] for out in outs_per_batch], weights=batch_sizes)) else: averages.append(np.float64(outs_per_batch[-1][i])) return unpack_singleton(averages) def predict_generator(model, generator, steps=None, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0): """See docstring for `Model.predict_generator`.""" model._make_predict_function() steps_done = 0 wait_time = 0.01 all_outs = [] is_sequence = isinstance(generator, Sequence) if not is_sequence and use_multiprocessing and workers > 1: warnings.warn( UserWarning('Using a generator with `use_multiprocessing=True`' ' and multiple workers may duplicate your data.' ' Please consider using the`keras.utils.Sequence' ' class.')) if steps is None: if is_sequence: steps = len(generator) else: raise ValueError('`steps=None` is only valid for a generator' ' based on the `keras.utils.Sequence` class.' ' Please specify `steps` or use the' ' `keras.utils.Sequence` class.') enqueuer = None try: if workers > 0: if is_sequence: enqueuer = OrderedEnqueuer( generator, use_multiprocessing=use_multiprocessing) else: enqueuer = GeneratorEnqueuer( generator, use_multiprocessing=use_multiprocessing, wait_time=wait_time) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() else: if is_sequence: output_generator = iter_sequence_infinite(generator) else: output_generator = generator if verbose == 1: progbar = Progbar(target=steps) while steps_done < steps: generator_output = next(output_generator) if isinstance(generator_output, tuple): # Compatibility with the generators # used for training. if len(generator_output) == 2: x, _ = generator_output elif len(generator_output) == 3: x, _, _ = generator_output else: raise ValueError('Output of generator should be ' 'a tuple `(x, y, sample_weight)` ' 'or `(x, y)`. Found: ' + str(generator_output)) else: # Assumes a generator that only # yields inputs (not targets and sample weights). x = generator_output outs = model.predict_on_batch(x) outs = to_list(outs) if not all_outs: for out in outs: all_outs.append([]) for i, out in enumerate(outs): all_outs[i].append(out) steps_done += 1 if verbose == 1: progbar.update(steps_done) finally: if enqueuer is not None: enqueuer.stop() if len(all_outs) == 1: if steps_done == 1: return all_outs[0][0] else: return np.concatenate(all_outs[0]) if steps_done == 1: return [out[0] for out in all_outs] else: return [np.concatenate(out) for out in all_outs] Keras-2.2.4/keras/engine/base_layer.py0000644000000000116100000015111413326715636017372 0ustar rooteng00000000000000"""Contains the base Layer class, from which all layers inherit. """ from __future__ import print_function from __future__ import absolute_import from __future__ import division import copy import re from six.moves import zip from .. import backend as K from .. import initializers from ..utils.layer_utils import count_params from ..utils.generic_utils import has_arg from ..utils.generic_utils import object_list_uid from ..utils.generic_utils import to_list from ..utils.generic_utils import unpack_singleton from ..utils.generic_utils import is_all_none from ..legacy import interfaces class Layer(object): """Abstract base layer class. # Properties input, output: Input/output tensor(s). Note that if the layer is used more than once (shared layer), this is ill-defined and will raise an exception. In such cases, use `layer.get_input_at(node_index)`. input_mask, output_mask: Mask tensors. Same caveats apply as input, output. input_shape: Shape tuple. Provided for convenience, but note that there may be cases in which this attribute is ill-defined (e.g. a shared layer with multiple input shapes), in which case requesting `input_shape` will raise an Exception. Prefer using `layer.get_input_shape_at(node_index)`. input_spec: List of InputSpec class instances each entry describes one required input: - ndim - dtype A layer with `n` input tensors must have an `input_spec` of length `n`. name: String, must be unique within a model. non_trainable_weights: List of variables. output_shape: Shape tuple. See `input_shape`. stateful: Boolean indicating whether the layer carries additional non-weight state. Used in, for instance, RNN cells to carry information between batches. supports_masking: Boolean indicator of whether the layer supports masking, typically for unused timesteps in a sequence. trainable: Boolean, whether the layer weights will be updated during training. trainable_weights: List of variables. uses_learning_phase: Whether any operation of the layer uses `K.in_training_phase()` or `K.in_test_phase()`. weights: The concatenation of the lists trainable_weights and non_trainable_weights (in this order). # Methods call(x, mask=None): Where the layer's logic lives. __call__(x, mask=None): Wrapper around the layer logic (`call`). If x is a Keras tensor: - Connect current layer with last layer from tensor: `self._add_inbound_node(last_layer)` - Add layer to tensor history If layer is not built: - Build from x._keras_shape compute_mask(x, mask) compute_output_shape(input_shape) count_params() get_config() get_input_at(node_index) get_input_mask_at(node_index) get_input_shape_at(node_index) get_output_at(node_index) get_output_mask_at(node_index) get_output_shape_at(node_index) get_weights() set_weights(weights) # Class Methods from_config(config) # Internal methods: _add_inbound_node(layer, index=0) assert_input_compatibility() build(input_shape) """ def __init__(self, **kwargs): self.input_spec = None self.supports_masking = False self.stateful = False # These properties will be set upon call of self.build() self._trainable_weights = [] self._non_trainable_weights = [] self._losses = [] self._updates = [] self._per_input_losses = {} self._per_input_updates = {} self._built = False # These lists will be filled via successive calls # to self._add_inbound_node(). self._inbound_nodes = [] self._outbound_nodes = [] # These properties should be set by the user via keyword arguments. # note that 'dtype', 'input_shape' and 'batch_input_shape' # are only applicable to input layers: do not pass these keywords # to non-input layers. allowed_kwargs = {'input_shape', 'batch_input_shape', 'batch_size', 'dtype', 'name', 'trainable', 'weights', 'input_dtype', # legacy } for kwarg in kwargs: if kwarg not in allowed_kwargs: raise TypeError('Keyword argument not understood:', kwarg) name = kwargs.get('name') if not name: prefix = self.__class__.__name__ name = _to_snake_case(prefix) + '_' + str(K.get_uid(prefix)) self.name = name self.trainable = kwargs.get('trainable', True) if 'input_shape' in kwargs or 'batch_input_shape' in kwargs: # In this case we will later create an input layer # to insert before the current layer if 'batch_input_shape' in kwargs: batch_input_shape = tuple(kwargs['batch_input_shape']) elif 'input_shape' in kwargs: if 'batch_size' in kwargs: batch_size = kwargs['batch_size'] else: batch_size = None batch_input_shape = ( batch_size,) + tuple(kwargs['input_shape']) self.batch_input_shape = batch_input_shape # Set dtype. dtype = kwargs.get('dtype') if dtype is None: dtype = kwargs.get('input_dtype') if dtype is None: dtype = K.floatx() self.dtype = dtype if 'weights' in kwargs: self._initial_weights = kwargs['weights'] else: self._initial_weights = None @staticmethod def _node_key(layer, node_index): """Converts a layer and its index to a unique (immutable type) name. This function is used internally with `self._network_nodes`. # Arguments layer: The layer. node_index: The layer's position (e.g. via enumerate) in a list of nodes. # Returns The unique name. """ return layer.name + '_ib-' + str(node_index) @property def losses(self): return self._losses @property def updates(self): if not self.trainable and not self.stateful: return [] return self._updates @property def built(self): return self._built @built.setter def built(self, value): self._built = value @property def trainable_weights(self): trainable = getattr(self, 'trainable', True) if trainable: return self._trainable_weights else: return [] @trainable_weights.setter def trainable_weights(self, weights): self._trainable_weights = weights @property def non_trainable_weights(self): trainable = getattr(self, 'trainable', True) if not trainable: return self._trainable_weights + self._non_trainable_weights else: return self._non_trainable_weights @non_trainable_weights.setter def non_trainable_weights(self, weights): self._non_trainable_weights = weights @interfaces.legacy_add_weight_support def add_weight(self, name, shape, dtype=None, initializer=None, regularizer=None, trainable=True, constraint=None): """Adds a weight variable to the layer. # Arguments name: String, the name for the weight variable. shape: The shape tuple of the weight. dtype: The dtype of the weight. initializer: An Initializer instance (callable). regularizer: An optional Regularizer instance. trainable: A boolean, whether the weight should be trained via backprop or not (assuming that the layer itself is also trainable). constraint: An optional Constraint instance. # Returns The created weight variable. """ initializer = initializers.get(initializer) if dtype is None: dtype = K.floatx() weight = K.variable(initializer(shape), dtype=dtype, name=name, constraint=constraint) if regularizer is not None: with K.name_scope('weight_regularizer'): self.add_loss(regularizer(weight)) if trainable: self._trainable_weights.append(weight) else: self._non_trainable_weights.append(weight) return weight def assert_input_compatibility(self, inputs): """Checks compatibility between the layer and provided inputs. This checks that the tensor(s) `input` verify the input assumptions of the layer (if any). If not, exceptions are raised. # Arguments inputs: input tensor or list of input tensors. # Raises ValueError: in case of mismatch between the provided inputs and the expectations of the layer. """ inputs = to_list(inputs) for x in inputs: try: K.is_keras_tensor(x) except ValueError: raise ValueError('Layer ' + self.name + ' was called with ' 'an input that isn\'t a symbolic tensor. ' 'Received type: ' + str(type(x)) + '. Full input: ' + str(inputs) + '. All inputs to the layer ' 'should be tensors.') if not self.input_spec: return if not isinstance(self.input_spec, (list, tuple)): input_spec = to_list(self.input_spec) else: input_spec = self.input_spec if len(inputs) != len(input_spec): raise ValueError('Layer ' + self.name + ' expects ' + str(len(input_spec)) + ' inputs, ' 'but it received ' + str(len(inputs)) + ' input tensors. Input received: ' + str(inputs)) for input_index, (x, spec) in enumerate(zip(inputs, input_spec)): if spec is None: continue # Check ndim. if spec.ndim is not None: if K.ndim(x) != spec.ndim: raise ValueError('Input ' + str(input_index) + ' is incompatible with layer ' + self.name + ': expected ndim=' + str(spec.ndim) + ', found ndim=' + str(K.ndim(x))) if spec.max_ndim is not None: ndim = K.ndim(x) if ndim is not None and ndim > spec.max_ndim: raise ValueError('Input ' + str(input_index) + ' is incompatible with layer ' + self.name + ': expected max_ndim=' + str(spec.max_ndim) + ', found ndim=' + str(K.ndim(x))) if spec.min_ndim is not None: ndim = K.ndim(x) if ndim is not None and ndim < spec.min_ndim: raise ValueError('Input ' + str(input_index) + ' is incompatible with layer ' + self.name + ': expected min_ndim=' + str(spec.min_ndim) + ', found ndim=' + str(K.ndim(x))) # Check dtype. if spec.dtype is not None: if K.dtype(x) != spec.dtype: raise ValueError('Input ' + str(input_index) + ' is incompatible with layer ' + self.name + ': expected dtype=' + str(spec.dtype) + ', found dtype=' + str(K.dtype(x))) # Check specific shape axes. if spec.axes: try: x_shape = K.int_shape(x) except TypeError: x_shape = None if x_shape is not None: for axis, value in spec.axes.items(): if (value is not None and x_shape[int(axis)] not in {value, None}): raise ValueError( 'Input ' + str(input_index) + ' is incompatible with layer ' + self.name + ': expected axis ' + str(axis) + ' of input shape to have ' 'value ' + str(value) + ' but got shape ' + str(x_shape)) # Check shape. if spec.shape is not None: try: x_shape = K.int_shape(x) except TypeError: x_shape = None if x_shape is not None: for spec_dim, dim in zip(spec.shape, x_shape): if spec_dim is not None and dim is not None: if spec_dim != dim: raise ValueError( 'Input ' + str(input_index) + ' is incompatible with layer ' + self.name + ': expected shape=' + str(spec.shape) + ', found shape=' + str(x_shape)) def call(self, inputs, **kwargs): """This is where the layer's logic lives. # Arguments inputs: Input tensor, or list/tuple of input tensors. **kwargs: Additional keyword arguments. # Returns A tensor or list/tuple of tensors. """ return inputs def __call__(self, inputs, **kwargs): """Wrapper around self.call(), for handling internal references. If a Keras tensor is passed: - We call self._add_inbound_node(). - If necessary, we `build` the layer to match the _keras_shape of the input(s). - We update the _keras_shape of every input tensor with its new shape (obtained via self.compute_output_shape). This is done as part of _add_inbound_node(). - We update the _keras_history of the output tensor(s) with the current layer. This is done as part of _add_inbound_node(). # Arguments inputs: Can be a tensor or list/tuple of tensors. **kwargs: Additional keyword arguments to be passed to `call()`. # Returns Output of the layer's `call` method. # Raises ValueError: in case the layer is missing shape information for its `build` call. """ if isinstance(inputs, list): inputs = inputs[:] with K.name_scope(self.name): # Handle laying building (weight creating, input spec locking). if not self.built: # Raise exceptions in case the input is not compatible # with the input_spec specified in the layer constructor. self.assert_input_compatibility(inputs) # Collect input shapes to build layer. input_shapes = [] for x_elem in to_list(inputs): if hasattr(x_elem, '_keras_shape'): input_shapes.append(x_elem._keras_shape) elif hasattr(K, 'int_shape'): input_shapes.append(K.int_shape(x_elem)) else: raise ValueError('You tried to call layer "' + self.name + '". This layer has no information' ' about its expected input shape, ' 'and thus cannot be built. ' 'You can build it manually via: ' '`layer.build(batch_input_shape)`') self.build(unpack_singleton(input_shapes)) self.built = True # Load weights that were specified at layer instantiation. if self._initial_weights is not None: self.set_weights(self._initial_weights) # Raise exceptions in case the input is not compatible # with the input_spec set at build time. self.assert_input_compatibility(inputs) # Handle mask propagation. previous_mask = _collect_previous_mask(inputs) user_kwargs = copy.copy(kwargs) if not is_all_none(previous_mask): # The previous layer generated a mask. if has_arg(self.call, 'mask'): if 'mask' not in kwargs: # If mask is explicitly passed to __call__, # we should override the default mask. kwargs['mask'] = previous_mask # Handle automatic shape inference (only useful for Theano). input_shape = _collect_input_shape(inputs) # Actually call the layer, # collecting output(s), mask(s), and shape(s). output = self.call(inputs, **kwargs) output_mask = self.compute_mask(inputs, previous_mask) # If the layer returns tensors from its inputs, unmodified, # we copy them to avoid loss of tensor metadata. output_ls = to_list(output) inputs_ls = to_list(inputs) output_ls_copy = [] for x in output_ls: if x in inputs_ls: x = K.identity(x) output_ls_copy.append(x) output = unpack_singleton(output_ls_copy) # Inferring the output shape is only relevant for Theano. if all([s is not None for s in to_list(input_shape)]): output_shape = self.compute_output_shape(input_shape) else: if isinstance(input_shape, list): output_shape = [None for _ in input_shape] else: output_shape = None if (not isinstance(output_mask, (list, tuple)) and len(output_ls) > 1): # Augment the mask to match the length of the output. output_mask = [output_mask] * len(output_ls) # Add an inbound node to the layer, so that it keeps track # of the call and of all new variables created during the call. # This also updates the layer history of the output tensor(s). # If the input tensor(s) had not previous Keras history, # this does nothing. self._add_inbound_node(input_tensors=inputs, output_tensors=output, input_masks=previous_mask, output_masks=output_mask, input_shapes=input_shape, output_shapes=output_shape, arguments=user_kwargs) # Apply activity regularizer if any: if (hasattr(self, 'activity_regularizer') and self.activity_regularizer is not None): with K.name_scope('activity_regularizer'): regularization_losses = [ self.activity_regularizer(x) for x in to_list(output)] self.add_loss(regularization_losses, inputs=to_list(inputs)) return output def _add_inbound_node(self, input_tensors, output_tensors, input_masks, output_masks, input_shapes, output_shapes, arguments=None): """Internal method to create an inbound node for the layer. # Arguments input_tensors: list of input tensors. output_tensors: list of output tensors. input_masks: list of input masks (a mask can be a tensor, or None). output_masks: list of output masks (a mask can be a tensor, or None). input_shapes: list of input shape tuples. output_shapes: list of output shape tuples. arguments: dictionary of keyword arguments that were passed to the `call` method of the layer at the call that created the node. """ input_tensors = to_list(input_tensors) output_tensors = to_list(output_tensors) input_masks = to_list(input_masks) output_masks = to_list(output_masks) input_shapes = to_list(input_shapes) output_shapes = to_list(output_shapes) # Collect input tensor(s) coordinates. inbound_layers = [] node_indices = [] tensor_indices = [] for x in input_tensors: if hasattr(x, '_keras_history'): inbound_layer, node_index, tensor_index = x._keras_history inbound_layers.append(inbound_layer) node_indices.append(node_index) tensor_indices.append(tensor_index) else: inbound_layers.append(None) node_indices.append(None) tensor_indices.append(None) # Create node, add it to inbound nodes. Node( self, inbound_layers=inbound_layers, node_indices=node_indices, tensor_indices=tensor_indices, input_tensors=input_tensors, output_tensors=output_tensors, input_masks=input_masks, output_masks=output_masks, input_shapes=input_shapes, output_shapes=output_shapes, arguments=arguments ) # Update tensor history, _keras_shape and _uses_learning_phase. for i in range(len(output_tensors)): output_tensors[i]._keras_shape = output_shapes[i] uses_lp = any( [getattr(x, '_uses_learning_phase', False) for x in input_tensors]) uses_lp = getattr(self, 'uses_learning_phase', False) or uses_lp output_tensors[i]._uses_learning_phase = getattr( output_tensors[i], '_uses_learning_phase', False) or uses_lp output_tensors[i]._keras_history = (self, len(self._inbound_nodes) - 1, i) def compute_output_shape(self, input_shape): """Computes the output shape of the layer. Assumes that the layer will be built to match that input shape provided. # Arguments input_shape: Shape tuple (tuple of integers) or list of shape tuples (one per output tensor of the layer). Shape tuples can include None for free dimensions, instead of an integer. # Returns An input shape tuple. """ return input_shape def compute_mask(self, inputs, mask=None): """Computes an output mask tensor. # Arguments inputs: Tensor or list of tensors. mask: Tensor or list of tensors. # Returns None or a tensor (or list of tensors, one per output tensor of the layer). """ if not self.supports_masking: if mask is not None: if isinstance(mask, list): if any(m is not None for m in mask): raise TypeError('Layer ' + self.name + ' does not support masking, ' 'but was passed an input_mask: ' + str(mask)) else: raise TypeError('Layer ' + self.name + ' does not support masking, ' 'but was passed an input_mask: ' + str(mask)) # masking not explicitly supported: return None as mask return None # if masking is explicitly supported, by default # carry over the input mask return mask def build(self, input_shape): """Creates the layer weights. Must be implemented on all layers that have weights. # Arguments input_shape: Keras tensor (future input to layer) or list/tuple of Keras tensors to reference for weight shape computations. """ self.built = True def _get_node_attribute_at_index(self, node_index, attr, attr_name): """Retrieves an attribute (e.g. input_tensors) from a node. This is used to implement the methods: - get_input_shape_at - get_output_shape_at - get_input_at etc... # Arguments node_index: Integer index of the node from which to retrieve the attribute. attr: Exact node attribute name. attr_name: Human-readable attribute name, for error messages. # Returns The layer's attribute `attr` at the node of index `node_index`. # Raises RuntimeError: If the layer has no inbound nodes. ValueError: If the index is does not match any node. """ if not self._inbound_nodes: raise RuntimeError('The layer has never been called ' 'and thus has no defined ' + attr_name + '.') if not len(self._inbound_nodes) > node_index: raise ValueError('Asked to get ' + attr_name + ' at node ' + str(node_index) + ', but the layer has only ' + str(len(self._inbound_nodes)) + ' inbound nodes.') values = getattr(self._inbound_nodes[node_index], attr) return unpack_singleton(values) def get_input_shape_at(self, node_index): """Retrieves the input shape(s) of a layer at a given node. # Arguments node_index: Integer, index of the node from which to retrieve the attribute. E.g. `node_index=0` will correspond to the first time the layer was called. # Returns A shape tuple (or list of shape tuples if the layer has multiple inputs). """ return self._get_node_attribute_at_index(node_index, 'input_shapes', 'input shape') def get_output_shape_at(self, node_index): """Retrieves the output shape(s) of a layer at a given node. # Arguments node_index: Integer, index of the node from which to retrieve the attribute. E.g. `node_index=0` will correspond to the first time the layer was called. # Returns A shape tuple (or list of shape tuples if the layer has multiple outputs). """ return self._get_node_attribute_at_index(node_index, 'output_shapes', 'output shape') def get_input_at(self, node_index): """Retrieves the input tensor(s) of a layer at a given node. # Arguments node_index: Integer, index of the node from which to retrieve the attribute. E.g. `node_index=0` will correspond to the first time the layer was called. # Returns A tensor (or list of tensors if the layer has multiple inputs). """ return self._get_node_attribute_at_index(node_index, 'input_tensors', 'input') def get_output_at(self, node_index): """Retrieves the output tensor(s) of a layer at a given node. # Arguments node_index: Integer, index of the node from which to retrieve the attribute. E.g. `node_index=0` will correspond to the first time the layer was called. # Returns A tensor (or list of tensors if the layer has multiple outputs). """ return self._get_node_attribute_at_index(node_index, 'output_tensors', 'output') def get_input_mask_at(self, node_index): """Retrieves the input mask tensor(s) of a layer at a given node. # Arguments node_index: Integer, index of the node from which to retrieve the attribute. E.g. `node_index=0` will correspond to the first time the layer was called. # Returns A mask tensor (or list of tensors if the layer has multiple inputs). """ return self._get_node_attribute_at_index(node_index, 'input_masks', 'input mask') def get_output_mask_at(self, node_index): """Retrieves the output mask tensor(s) of a layer at a given node. # Arguments node_index: Integer, index of the node from which to retrieve the attribute. E.g. `node_index=0` will correspond to the first time the layer was called. # Returns A mask tensor (or list of tensors if the layer has multiple outputs). """ return self._get_node_attribute_at_index(node_index, 'output_masks', 'output mask') @property def input(self): """Retrieves the input tensor(s) of a layer. Only applicable if the layer has exactly one inbound node, i.e. if it is connected to one incoming layer. # Returns Input tensor or list of input tensors. # Raises AttributeError: if the layer is connected to more than one incoming layers. """ if len(self._inbound_nodes) > 1: raise AttributeError('Layer ' + self.name + ' has multiple inbound nodes, ' 'hence the notion of "layer input" ' 'is ill-defined. ' 'Use `get_input_at(node_index)` instead.') elif not self._inbound_nodes: raise AttributeError('Layer ' + self.name + ' is not connected, no input to return.') return self._get_node_attribute_at_index(0, 'input_tensors', 'input') @property def output(self): """Retrieves the output tensor(s) of a layer. Only applicable if the layer has exactly one inbound node, i.e. if it is connected to one incoming layer. # Returns Output tensor or list of output tensors. # Raises AttributeError: if the layer is connected to more than one incoming layers. """ if not self._inbound_nodes: raise AttributeError('Layer ' + self.name + ' has no inbound nodes.') if len(self._inbound_nodes) > 1: raise AttributeError('Layer ' + self.name + ' has multiple inbound nodes, ' 'hence the notion of "layer output" ' 'is ill-defined. ' 'Use `get_output_at(node_index)` instead.') return self._get_node_attribute_at_index(0, 'output_tensors', 'output') @property def input_mask(self): """Retrieves the input mask tensor(s) of a layer. Only applicable if the layer has exactly one inbound node, i.e. if it is connected to one incoming layer. # Returns Input mask tensor (potentially None) or list of input mask tensors. # Raises AttributeError: if the layer is connected to more than one incoming layers. """ if len(self._inbound_nodes) != 1: raise AttributeError('Layer ' + self.name + ' has multiple inbound nodes, ' + 'hence the notion of "layer input mask" ' 'is ill-defined. ' 'Use `get_input_mask_at(node_index)` ' 'instead.') return self._get_node_attribute_at_index(0, 'input_masks', 'input mask') @property def output_mask(self): """Retrieves the output mask tensor(s) of a layer. Only applicable if the layer has exactly one inbound node, i.e. if it is connected to one incoming layer. # Returns Output mask tensor (potentially None) or list of output mask tensors. # Raises AttributeError: if the layer is connected to more than one incoming layers. """ if len(self._inbound_nodes) != 1: raise AttributeError('Layer ' + self.name + ' has multiple inbound nodes, ' 'hence the notion of "layer output mask" ' 'is ill-defined. ' 'Use `get_output_mask_at(node_index)` ' 'instead.') return self._get_node_attribute_at_index(0, 'output_masks', 'output mask') @property def input_shape(self): """Retrieves the input shape tuple(s) of a layer. Only applicable if the layer has exactly one inbound node, i.e. if it is connected to one incoming layer. # Returns Input shape tuple (or list of input shape tuples, one tuple per input tensor). # Raises AttributeError: if the layer is connected to more than one incoming layers. """ if not self._inbound_nodes: raise AttributeError('The layer has never been called ' 'and thus has no defined input shape.') all_input_shapes = set( [str(node.input_shapes) for node in self._inbound_nodes]) if len(all_input_shapes) == 1: input_shapes = self._inbound_nodes[0].input_shapes return unpack_singleton(input_shapes) else: raise AttributeError('The layer "' + str(self.name) + ' has multiple inbound nodes, ' 'with different input shapes. Hence ' 'the notion of "input shape" is ' 'ill-defined for the layer. ' 'Use `get_input_shape_at(node_index)` ' 'instead.') @property def output_shape(self): """Retrieves the output shape tuple(s) of a layer. Only applicable if the layer has one inbound node, or if all inbound nodes have the same output shape. # Returns Output shape tuple (or list of input shape tuples, one tuple per output tensor). # Raises AttributeError: if the layer is connected to more than one incoming layers. """ if not self._inbound_nodes: raise AttributeError('The layer has never been called ' 'and thus has no defined output shape.') all_output_shapes = set( [str(node.output_shapes) for node in self._inbound_nodes]) if len(all_output_shapes) == 1: output_shapes = self._inbound_nodes[0].output_shapes return unpack_singleton(output_shapes) else: raise AttributeError('The layer "' + str(self.name) + ' has multiple inbound nodes, ' 'with different output shapes. Hence ' 'the notion of "output shape" is ' 'ill-defined for the layer. ' 'Use `get_output_shape_at(node_index)` ' 'instead.') def add_loss(self, losses, inputs=None): """Adds losses to the layer. The loss may potentially be conditional on some inputs tensors, for instance activity losses are conditional on the layer's inputs. # Arguments losses: loss tensor or list of loss tensors to add to the layer. inputs: input tensor or list of inputs tensors to mark the losses as conditional on these inputs. If None is passed, the loss is assumed unconditional (e.g. L2 weight regularization, which only depends on the layer's weights variables, not on any inputs tensors). """ if losses is None or losses == []: return # Update self.losses losses = to_list(losses) if hasattr(self, '_losses'): self._losses += losses # Update self._per_input_updates if isinstance(inputs, list) and inputs == []: inputs = None if inputs is not None: inputs_hash = object_list_uid(inputs) else: # Updates indexed by None are unconditional # rather than input-dependent inputs_hash = None if inputs_hash not in self._per_input_losses: self._per_input_losses[inputs_hash] = [] self._per_input_losses[inputs_hash] += losses def add_update(self, updates, inputs=None): """Adds updates to the layer. The updates may potentially be conditional on some inputs tensors, for instance batch norm updates are conditional on the layer's inputs. # Arguments updates: update op or list of update ops to add to the layer. inputs: input tensor or list of inputs tensors to mark the updates as conditional on these inputs. If None is passed, the updates are assumed unconditional. """ if updates is None or updates == []: return # Update self.updates updates = to_list(updates) if hasattr(self, '_updates'): self._updates += updates # Update self._per_input_updates if isinstance(inputs, list) and inputs == []: inputs = None if inputs is not None: inputs_hash = object_list_uid(inputs) else: # Updates indexed by None are unconditional # rather than input-dependent inputs_hash = None if inputs_hash not in self._per_input_updates: self._per_input_updates[inputs_hash] = [] self._per_input_updates[inputs_hash] += updates def get_updates_for(self, inputs): if not self.trainable and not self.stateful: return [] if inputs is not None: inputs_hash = object_list_uid(inputs) else: inputs_hash = None if inputs_hash in self._per_input_updates: return self._per_input_updates[inputs_hash] return [] def get_losses_for(self, inputs): if inputs is not None: inputs_hash = object_list_uid(inputs) else: inputs_hash = None if inputs_hash in self._per_input_losses: return self._per_input_losses[inputs_hash] return [] @property def weights(self): return self.trainable_weights + self.non_trainable_weights def set_weights(self, weights): """Sets the weights of the layer, from Numpy arrays. # Arguments weights: a list of Numpy arrays. The number of arrays and their shape must match number of the dimensions of the weights of the layer (i.e. it should match the output of `get_weights`). # Raises ValueError: If the provided weights list does not match the layer's specifications. """ params = self.weights if len(params) != len(weights): raise ValueError('You called `set_weights(weights)` on layer "' + self.name + '" with a weight list of length ' + str(len(weights)) + ', but the layer was expecting ' + str(len(params)) + ' weights. Provided weights: ' + str(weights)[:50] + '...') if not params: return weight_value_tuples = [] param_values = K.batch_get_value(params) for pv, p, w in zip(param_values, params, weights): if pv.shape != w.shape: raise ValueError('Layer weight shape ' + str(pv.shape) + ' not compatible with ' 'provided weight shape ' + str(w.shape)) weight_value_tuples.append((p, w)) K.batch_set_value(weight_value_tuples) def get_weights(self): """Returns the current weights of the layer. # Returns Weights values as a list of numpy arrays. """ params = self.weights return K.batch_get_value(params) def get_config(self): """Returns the config of the layer. A layer config is a Python dictionary (serializable) containing the configuration of a layer. The same layer can be reinstantiated later (without its trained weights) from this configuration. The config of a layer does not include connectivity information, nor the layer class name. These are handled by `Network` (one layer of abstraction above). # Returns Python dictionary. """ config = {'name': self.name, 'trainable': self.trainable} if hasattr(self, 'batch_input_shape'): config['batch_input_shape'] = self.batch_input_shape if hasattr(self, 'dtype'): config['dtype'] = self.dtype return config @classmethod def from_config(cls, config): """Creates a layer from its config. This method is the reverse of `get_config`, capable of instantiating the same layer from the config dictionary. It does not handle layer connectivity (handled by Network), nor weights (handled by `set_weights`). # Arguments config: A Python dictionary, typically the output of get_config. # Returns A layer instance. """ return cls(**config) def count_params(self): """Counts the total number of scalars composing the weights. # Returns An integer count. # Raises RuntimeError: if the layer isn't yet built (in which case its weights aren't yet defined). """ if not self.built: if self.__class__.__name__ == 'Sequential': self.build() else: raise RuntimeError('You tried to call `count_params` on ' + self.name + ', but the layer isn\'t built. ' 'You can build it manually via: `' + self.name + '.build(batch_input_shape)`.') return count_params(self.weights) class InputSpec(object): """Specifies the ndim, dtype and shape of every input to a layer. Every layer should expose (if appropriate) an `input_spec` attribute: a list of instances of InputSpec (one per input tensor). A None entry in a shape is compatible with any dimension, a None shape is compatible with any shape. # Arguments dtype: Expected datatype of the input. shape: Shape tuple, expected shape of the input (may include None for unchecked axes). ndim: Integer, expected rank of the input. max_ndim: Integer, maximum rank of the input. min_ndim: Integer, minimum rank of the input. axes: Dictionary mapping integer axes to a specific dimension value. """ def __init__(self, dtype=None, shape=None, ndim=None, max_ndim=None, min_ndim=None, axes=None): self.dtype = dtype self.shape = shape if shape is not None: self.ndim = len(shape) else: self.ndim = ndim self.max_ndim = max_ndim self.min_ndim = min_ndim self.axes = axes or {} def __repr__(self): spec = [('dtype=' + str(self.dtype)) if self.dtype else '', ('shape=' + str(self.shape)) if self.shape else '', ('ndim=' + str(self.ndim)) if self.ndim else '', ('max_ndim=' + str(self.max_ndim)) if self.max_ndim else '', ('min_ndim=' + str(self.min_ndim)) if self.min_ndim else '', ('axes=' + str(self.axes)) if self.axes else ''] return 'InputSpec(%s)' % ', '.join(x for x in spec if x) class Node(object): """A `Node` describes the connectivity between two layers. Each time a layer is connected to some new input, a node is added to `layer._inbound_nodes`. Each time the output of a layer is used by another layer, a node is added to `layer._outbound_nodes`. # Arguments outbound_layer: the layer that takes `input_tensors` and turns them into `output_tensors` (the node gets created when the `call` method of the layer was called). inbound_layers: a list of layers, the same length as `input_tensors`, the layers from where `input_tensors` originate. node_indices: a list of integers, the same length as `inbound_layers`. `node_indices[i]` is the origin node of `input_tensors[i]` (necessary since each inbound layer might have several nodes, e.g. if the layer is being shared with a different data stream). tensor_indices: a list of integers, the same length as `inbound_layers`. `tensor_indices[i]` is the index of `input_tensors[i]` within the output of the inbound layer (necessary since each inbound layer might have multiple tensor outputs, with each one being independently manipulable). input_tensors: list of input tensors. output_tensors: list of output tensors. input_masks: list of input masks (a mask can be a tensor, or None). output_masks: list of output masks (a mask can be a tensor, or None). input_shapes: list of input shape tuples. output_shapes: list of output shape tuples. arguments: dictionary of keyword arguments that were passed to the `call` method of the layer at the call that created the node. `node_indices` and `tensor_indices` are basically fine-grained coordinates describing the origin of the `input_tensors`, verifying the following: origin_node = inbound_layers[i]._inbound_nodes[node_indices[i]] input_tensors[i] == origin_node.output_tensors[tensor_indices[i]] A node from layer A to layer B is added to: A._outbound_nodes B._inbound_nodes """ def __init__(self, outbound_layer, inbound_layers, node_indices, tensor_indices, input_tensors, output_tensors, input_masks, output_masks, input_shapes, output_shapes, arguments=None): # Layer instance (NOT a list). # this is the layer that takes a list of input tensors # and turns them into a list of output tensors. # the current node will be added to # the inbound_nodes of outbound_layer. self.outbound_layer = outbound_layer # The following 3 properties describe where # the input tensors come from: which layers, # and for each layer, which node and which # tensor output of each node. # List of layer instances. self.inbound_layers = inbound_layers # List of integers, 1:1 mapping with inbound_layers. self.node_indices = node_indices # List of integers, 1:1 mapping with inbound_layers. self.tensor_indices = tensor_indices # Following 2 properties: # tensor inputs and outputs of outbound_layer. # List of tensors. 1:1 mapping with inbound_layers. self.input_tensors = input_tensors # List of tensors, created by outbound_layer.call(). self.output_tensors = output_tensors # Following 2 properties: input and output masks. # List of tensors, 1:1 mapping with input_tensor. self.input_masks = input_masks # List of tensors, created by outbound_layer.compute_mask(). self.output_masks = output_masks # Following 2 properties: input and output shapes. # List of shape tuples, shapes of input_tensors. self.input_shapes = input_shapes # List of shape tuples, shapes of output_tensors. self.output_shapes = output_shapes # Optional keyword arguments to layer's `call`. self.arguments = arguments # Add nodes to all layers involved. for layer in inbound_layers: if layer is not None: layer._outbound_nodes.append(self) outbound_layer._inbound_nodes.append(self) def get_config(self): inbound_names = [] for layer in self.inbound_layers: if layer: inbound_names.append(layer.name) else: inbound_names.append(None) if self.outbound_layer: outbound_layer = self.outbound_layer.name else: outbound_layer = None return {'outbound_layer': outbound_layer, 'inbound_layers': inbound_names, 'node_indices': self.node_indices, 'tensor_indices': self.tensor_indices} def _collect_previous_mask(input_tensors): """Retrieves the output mask(s) of the previous node. # Arguments input_tensors: A tensor or list of tensors. # Returns A mask tensor or list of mask tensors. """ input_tensors = to_list(input_tensors) masks = [] for x in input_tensors: if hasattr(x, '_keras_history'): inbound_layer, node_index, tensor_index = x._keras_history node = inbound_layer._inbound_nodes[node_index] mask = node.output_masks[tensor_index] masks.append(mask) else: masks.append(None) return unpack_singleton(masks) def _to_snake_case(name): intermediate = re.sub('(.)([A-Z][a-z0-9]+)', r'\1_\2', name) insecure = re.sub('([a-z])([A-Z])', r'\1_\2', intermediate).lower() # If the class is private the name starts with "_" which is not secure # for creating scopes. We prefix the name with "private" in this case. if insecure[0] != '_': return insecure return 'private' + insecure def _collect_input_shape(input_tensors): """Collects the output shape(s) of a list of Keras tensors. # Arguments input_tensors: list of input tensors (or single input tensor). # Returns List of shape tuples (or single tuple), one tuple per input. """ input_tensors = to_list(input_tensors) shapes = [] for x in input_tensors: try: shapes.append(K.int_shape(x)) except TypeError: shapes.append(None) return unpack_singleton(shapes) Keras-2.2.4/keras/engine/saving.py0000644000000000116100000013366713354530144016556 0ustar rooteng00000000000000"""Model saving utilities. """ from __future__ import print_function from __future__ import absolute_import from __future__ import division import numpy as np import os import json import yaml import warnings from six.moves import zip from .. import backend as K from .. import optimizers from ..utils.io_utils import ask_to_proceed_with_overwrite from ..utils.io_utils import h5dict from ..utils import conv_utils try: import h5py HDF5_OBJECT_HEADER_LIMIT = 64512 except ImportError: h5py = None def _serialize_model(model, f, include_optimizer=True): """Model serialization logic. This method is used for both writing to HDF5 file/group, as well as pickling. This is achieved via a `keras.utils.hdf5_utls.H5Dict` object, which can wrap HDF5 files, groups and dicts with a common API. # Arguments model: Keras model instance to be serialized. f: keras.utils.io_utils.HD5Dict instance. include_optimizer: If True, serialize optimizer's state together. """ def get_json_type(obj): """Serialize any object to a JSON-serializable structure. # Arguments obj: the object to serialize # Returns JSON-serializable structure representing `obj`. # Raises TypeError: if `obj` cannot be serialized. """ # if obj is a serializable Keras class instance # e.g. optimizer, layer if hasattr(obj, 'get_config'): return {'class_name': obj.__class__.__name__, 'config': obj.get_config()} # if obj is any numpy type if type(obj).__module__ == np.__name__: if isinstance(obj, np.ndarray): return obj.tolist() else: return obj.item() # misc functions (e.g. loss function) if callable(obj): return obj.__name__ # if obj is a python 'type' if type(obj).__name__ == type.__name__: return obj.__name__ raise TypeError('Not JSON Serializable: %s' % (obj,)) from .. import __version__ as keras_version f['keras_version'] = str(keras_version).encode('utf8') f['backend'] = K.backend().encode('utf8') model_config = {} model_config['class_name'] = model.__class__.__name__ model_config['config'] = model.get_config() model_config = json.dumps(model_config, default=get_json_type) model_config = model_config.encode('utf-8') f['model_config'] = model_config model_weights_group = f['model_weights'] model_layers = model.layers model_weights_group['layer_names'] = [layer.name.encode('utf8') for layer in model_layers] model_weights_group['backend'] = K.backend().encode('utf8') model_weights_group['keras_version'] = str(keras_version).encode('utf8') for layer in model_layers: layer_group = model_weights_group[layer.name] symbolic_weights = layer.weights weight_values = K.batch_get_value(symbolic_weights) weight_names = [] for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)): if hasattr(w, 'name') and w.name: name = str(w.name) else: name = 'param_' + str(i) if name in weight_names: idx = 2 unique_name = name + '_1' while unique_name in weight_names: unique_name = name + '_' + str(idx) idx += 1 name = unique_name weight_names.append(name.encode('utf8')) layer_group['weight_names'] = weight_names for name, val in zip(weight_names, weight_values): layer_group[name] = val if include_optimizer and model.optimizer: if isinstance(model.optimizer, optimizers.TFOptimizer): warnings.warn( 'TensorFlow optimizers do not ' 'make it possible to access ' 'optimizer attributes or optimizer state ' 'after instantiation. ' 'As a result, we cannot save the optimizer ' 'as part of the model save file.' 'You will have to compile your model again ' 'after loading it. ' 'Prefer using a Keras optimizer instead ' '(see keras.io/optimizers).') else: f['training_config'] = json.dumps({ 'optimizer_config': { 'class_name': model.optimizer.__class__.__name__, 'config': model.optimizer.get_config() }, 'loss': model.loss, 'metrics': model.metrics, 'sample_weight_mode': model.sample_weight_mode, 'loss_weights': model.loss_weights, }, default=get_json_type).encode('utf8') symbolic_weights = getattr(model.optimizer, 'weights') if symbolic_weights: optimizer_weights_group = f['optimizer_weights'] weight_values = K.batch_get_value(symbolic_weights) weight_names = [] for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)): # Default values of symbolic_weights is /variable # for Theano and CNTK if K.backend() == 'theano' or K.backend() == 'cntk': if hasattr(w, 'name'): if w.name.split('/')[-1] == 'variable': name = str(w.name) + '_' + str(i) else: name = str(w.name) else: name = 'param_' + str(i) else: if hasattr(w, 'name') and w.name: name = str(w.name) else: name = 'param_' + str(i) if name in weight_names: idx = 2 unique_name = name + '_1' while unique_name in weight_names: unique_name = name + '_' + str(idx) idx += 1 name = unique_name weight_names.append(name.encode('utf8')) optimizer_weights_group['weight_names'] = weight_names for name, val in zip(weight_names, weight_values): optimizer_weights_group[name] = val def _deserialize_model(f, custom_objects=None, compile=True): """De-serializes a model serialized via _serialize_model # Arguments f: `keras.utils.hdf5_utils.HFDict` instance. custom_objects: Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization. compile: Boolean, whether to compile the model after loading. # Returns A Keras model instance. If an optimizer was found as part of the saved model, the model is already compiled. Otherwise, the model is uncompiled and a warning will be displayed. When `compile` is set to False, the compilation is omitted without any warning. """ if not custom_objects: custom_objects = {} def convert_custom_objects(obj): """Handles custom object lookup. # Arguments obj: object, dict, or list. # Returns The same structure, where occurrences of a custom object name have been replaced with the custom object. """ if isinstance(obj, list): deserialized = [] for value in obj: deserialized.append(convert_custom_objects(value)) return deserialized if isinstance(obj, dict): deserialized = {} for key, value in obj.items(): deserialized[key] = convert_custom_objects(value) return deserialized if obj in custom_objects: return custom_objects[obj] return obj model_config = f['model_config'] if model_config is None: raise ValueError('No model found in config.') model_config = json.loads(model_config.decode('utf-8')) model = model_from_config(model_config, custom_objects=custom_objects) model_weights_group = f['model_weights'] if 'keras_version' in model_weights_group: original_keras_version = model_weights_group['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in model_weights_group: original_backend = model_weights_group['backend'].decode('utf8') else: original_backend = None layer_names = model_weights_group['layer_names'] layers = model.layers filtered_layers = [] for layer in layers: weights = layer.weights if weights: filtered_layers.append(layer) filtered_layer_names = [] for name in layer_names: layer_weights = model_weights_group[name] weight_names = layer_weights['weight_names'] if weight_names: filtered_layer_names.append(name) layer_names = filtered_layer_names if len(layer_names) != len(filtered_layers): raise ValueError('You are trying to load a weight file' ' containing {} layers into a model with {} layers' .format(len(layer_names), len(filtered_layers)) ) # We batch weight value assignments in a single backend call # which provides a speedup in TensorFlow. weight_value_tuples = [] for k, name in enumerate(layer_names): layer_weights = model_weights_group[name] weight_names = layer_weights['weight_names'] weight_values = [layer_weights[weight_name] for weight_name in weight_names] layer = filtered_layers[k] symbolic_weights = layer.weights weight_values = preprocess_weights_for_loading(layer, weight_values, original_keras_version, original_backend, reshape=False) if len(weight_values) != len(symbolic_weights): raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '" in the current model) was found to ' 'correspond to layer ' + name + ' in the save file. ' 'However the new layer ' + layer.name + ' expects ' + str(len(symbolic_weights)) + ' weights, but the saved weights have ' + str(len(weight_values)) + ' elements.') weight_value_tuples += zip(symbolic_weights, weight_values) K.batch_set_value(weight_value_tuples) if compile: training_config = f.get('training_config') if training_config is None: warnings.warn('No training configuration found in save file: ' 'the model was *not* compiled. ' 'Compile it manually.') return model training_config = json.loads(training_config.decode('utf-8')) optimizer_config = training_config['optimizer_config'] optimizer = optimizers.deserialize(optimizer_config, custom_objects=custom_objects) # Recover loss functions and metrics. loss = convert_custom_objects(training_config['loss']) metrics = convert_custom_objects(training_config['metrics']) sample_weight_mode = training_config['sample_weight_mode'] loss_weights = training_config['loss_weights'] # Compile model. model.compile(optimizer=optimizer, loss=loss, metrics=metrics, loss_weights=loss_weights, sample_weight_mode=sample_weight_mode) # Set optimizer weights. if 'optimizer_weights' in f: # Build train function (to get weight updates). model._make_train_function() optimizer_weights_group = f['optimizer_weights'] optimizer_weight_names = [ n.decode('utf8') for n in optimizer_weights_group['weight_names']] optimizer_weight_values = [optimizer_weights_group[n] for n in optimizer_weight_names] try: model.optimizer.set_weights(optimizer_weight_values) except ValueError: warnings.warn('Error in loading the saved optimizer ' 'state. As a result, your model is ' 'starting with a freshly initialized ' 'optimizer.') return model def save_model(model, filepath, overwrite=True, include_optimizer=True): """Save a model to a HDF5 file. Note: Please also see [How can I install HDF5 or h5py to save my models in Keras?]( /getting-started/faq/ #how-can-i-install-HDF5-or-h5py-to-save-my-models-in-Keras) in the FAQ for instructions on how to install `h5py`. The saved model contains: - the model's configuration (topology) - the model's weights - the model's optimizer's state (if any) Thus the saved model can be reinstantiated in the exact same state, without any of the code used for model definition or training. # Arguments model: Keras model instance to be saved. filepath: one of the following: - string, path where to save the model, or - h5py.File or h5py.Group object where to save the model overwrite: Whether we should overwrite any existing model at the target location, or instead ask the user with a manual prompt. include_optimizer: If True, save optimizer's state together. # Raises ImportError: if h5py is not available. """ if h5py is None: raise ImportError('`save_model` requires h5py.') if not isinstance(filepath, h5py.Group): # If file exists and should not be overwritten. if not overwrite and os.path.isfile(filepath): proceed = ask_to_proceed_with_overwrite(filepath) if not proceed: return opened_new_file = True else: opened_new_file = False f = h5dict(filepath, mode='w') try: _serialize_model(model, f, include_optimizer) finally: if opened_new_file: f.close() def load_model(filepath, custom_objects=None, compile=True): """Loads a model saved via `save_model`. # Arguments filepath: one of the following: - string, path to the saved model, or - h5py.File or h5py.Group object from which to load the model custom_objects: Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization. compile: Boolean, whether to compile the model after loading. # Returns A Keras model instance. If an optimizer was found as part of the saved model, the model is already compiled. Otherwise, the model is uncompiled and a warning will be displayed. When `compile` is set to False, the compilation is omitted without any warning. # Raises ImportError: if h5py is not available. ValueError: In case of an invalid savefile. """ if h5py is None: raise ImportError('`load_model` requires h5py.') model = None opened_new_file = not isinstance(filepath, h5py.Group) f = h5dict(filepath, 'r') try: model = _deserialize_model(f, custom_objects, compile) finally: if opened_new_file: f.close() return model def pickle_model(model): d = {} f = h5dict(d) _serialize_model(model, f) return d def unpickle_model(state): f = h5dict(state, mode='r') return _deserialize_model(f) def model_from_config(config, custom_objects=None): """Instantiates a Keras model from its config. # Arguments config: Configuration dictionary. custom_objects: Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization. # Returns A Keras model instance (uncompiled). # Raises TypeError: if `config` is not a dictionary. """ if isinstance(config, list): raise TypeError('`model_from_config` expects a dictionary, ' 'not a list. Maybe you meant to use ' '`Sequential.from_config(config)`?') from ..layers import deserialize return deserialize(config, custom_objects=custom_objects) def model_from_yaml(yaml_string, custom_objects=None): """Parses a yaml model configuration file and returns a model instance. # Arguments yaml_string: YAML string encoding a model configuration. custom_objects: Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization. # Returns A Keras model instance (uncompiled). """ config = yaml.load(yaml_string) from ..layers import deserialize return deserialize(config, custom_objects=custom_objects) def model_from_json(json_string, custom_objects=None): """Parses a JSON model configuration file and returns a model instance. # Arguments json_string: JSON string encoding a model configuration. custom_objects: Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization. # Returns A Keras model instance (uncompiled). """ config = json.loads(json_string) from ..layers import deserialize return deserialize(config, custom_objects=custom_objects) def save_attributes_to_hdf5_group(group, name, data): """Saves attributes (data) of the specified name into the HDF5 group. This method deals with an inherent problem of HDF5 file which is not able to store data larger than HDF5_OBJECT_HEADER_LIMIT bytes. # Arguments group: A pointer to a HDF5 group. name: A name of the attributes to save. data: Attributes data to store. """ # Check that no item in `data` is larger than `HDF5_OBJECT_HEADER_LIMIT` # because in that case even chunking the array would not make the saving # possible. bad_attributes = [x for x in data if len(x) > HDF5_OBJECT_HEADER_LIMIT] # Expecting this to never be true. if len(bad_attributes) > 0: raise RuntimeError('The following attributes cannot be saved to HDF5 ' 'file because they are larger than %d bytes: %s' % (HDF5_OBJECT_HEADER_LIMIT, ', '.join([x for x in bad_attributes]))) data_npy = np.asarray(data) num_chunks = 1 chunked_data = np.array_split(data_npy, num_chunks) # This will never loop forever thanks to the test above. while any(map(lambda x: x.nbytes > HDF5_OBJECT_HEADER_LIMIT, chunked_data)): num_chunks += 1 chunked_data = np.array_split(data_npy, num_chunks) if num_chunks > 1: for chunk_id, chunk_data in enumerate(chunked_data): group.attrs['%s%d' % (name, chunk_id)] = chunk_data else: group.attrs[name] = data def load_attributes_from_hdf5_group(group, name): """Loads attributes of the specified name from the HDF5 group. This method deals with an inherent problem of HDF5 file which is not able to store data larger than HDF5_OBJECT_HEADER_LIMIT bytes. # Arguments group: A pointer to a HDF5 group. name: A name of the attributes to load. # Returns data: Attributes data. """ if name in group.attrs: data = [n.decode('utf8') for n in group.attrs[name]] else: data = [] chunk_id = 0 while ('%s%d' % (name, chunk_id)) in group.attrs: data.extend([n.decode('utf8') for n in group.attrs['%s%d' % (name, chunk_id)]]) chunk_id += 1 return data def save_weights_to_hdf5_group(f, layers): from .. import __version__ as keras_version save_attributes_to_hdf5_group( f, 'layer_names', [layer.name.encode('utf8') for layer in layers]) f.attrs['backend'] = K.backend().encode('utf8') f.attrs['keras_version'] = str(keras_version).encode('utf8') for layer in layers: g = f.create_group(layer.name) symbolic_weights = layer.weights weight_values = K.batch_get_value(symbolic_weights) weight_names = [] for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)): if hasattr(w, 'name') and w.name: name = str(w.name) else: name = 'param_' + str(i) weight_names.append(name.encode('utf8')) save_attributes_to_hdf5_group(g, 'weight_names', weight_names) for name, val in zip(weight_names, weight_values): param_dset = g.create_dataset(name, val.shape, dtype=val.dtype) if not val.shape: # scalar param_dset[()] = val else: param_dset[:] = val def preprocess_weights_for_loading(layer, weights, original_keras_version=None, original_backend=None, reshape=False): """Converts layers weights from Keras 1 format to Keras 2. # Arguments layer: Layer instance. weights: List of weights values (Numpy arrays). original_keras_version: Keras version for the weights, as a string. original_backend: Keras backend the weights were trained with, as a string. reshape: Reshape weights to fit the layer when the correct number of values are present but the shape does not match. # Returns A list of weights values (Numpy arrays). """ def convert_nested_bidirectional(weights): """Converts layers nested in `Bidirectional` wrapper. # Arguments weights: List of weights values (Numpy arrays). # Returns A list of weights values (Numpy arrays). """ num_weights_per_layer = len(weights) // 2 forward_weights = preprocess_weights_for_loading( layer.forward_layer, weights[:num_weights_per_layer], original_keras_version, original_backend) backward_weights = preprocess_weights_for_loading( layer.backward_layer, weights[num_weights_per_layer:], original_keras_version, original_backend) return forward_weights + backward_weights def convert_nested_time_distributed(weights): """Converts layers nested in `TimeDistributed` wrapper. # Arguments weights: List of weights values (Numpy arrays). # Returns A list of weights values (Numpy arrays). """ return preprocess_weights_for_loading( layer.layer, weights, original_keras_version, original_backend) def convert_nested_model(weights): """Converts layers nested in `Model` or `Sequential`. # Arguments weights: List of weights values (Numpy arrays). # Returns A list of weights values (Numpy arrays). """ new_weights = [] # trainable weights for sublayer in layer.layers: num_weights = len(sublayer.trainable_weights) if num_weights > 0: new_weights.extend(preprocess_weights_for_loading( layer=sublayer, weights=weights[:num_weights], original_keras_version=original_keras_version, original_backend=original_backend)) weights = weights[num_weights:] # non-trainable weights for sublayer in layer.layers: num_weights = len([l for l in sublayer.weights if l not in sublayer.trainable_weights]) if num_weights > 0: new_weights.extend(preprocess_weights_for_loading( layer=sublayer, weights=weights[:num_weights], original_keras_version=original_keras_version, original_backend=original_backend)) weights = weights[num_weights:] return new_weights # Convert layers nested in Bidirectional/TimeDistributed/Model/Sequential. # Both transformation should be ran for both Keras 1->2 conversion # and for conversion of CuDNN layers. if layer.__class__.__name__ == 'Bidirectional': weights = convert_nested_bidirectional(weights) if layer.__class__.__name__ == 'TimeDistributed': weights = convert_nested_time_distributed(weights) elif layer.__class__.__name__ in ['Model', 'Sequential']: weights = convert_nested_model(weights) if original_keras_version == '1': if layer.__class__.__name__ == 'TimeDistributed': weights = preprocess_weights_for_loading(layer.layer, weights, original_keras_version, original_backend) if layer.__class__.__name__ == 'Conv1D': shape = weights[0].shape # Handle Keras 1.1 format if shape[:2] != (layer.kernel_size[0], 1) or shape[3] != layer.filters: # Legacy shape: # (filters, input_dim, filter_length, 1) assert (shape[0] == layer.filters and shape[2:] == (layer.kernel_size[0], 1)) weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) weights[0] = weights[0][:, 0, :, :] if layer.__class__.__name__ == 'Conv2D': if layer.data_format == 'channels_first': # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) if layer.__class__.__name__ == 'Conv2DTranspose': if layer.data_format == 'channels_last': # old: (kernel_rows, kernel_cols, stack_size, filters) # new: (kernel_rows, kernel_cols, filters, stack_size) weights[0] = np.transpose(weights[0], (0, 1, 3, 2)) if layer.data_format == 'channels_first': # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, filters, stack_size) weights[0] = np.transpose(weights[0], (2, 3, 0, 1)) if layer.__class__.__name__ == 'Conv3D': if layer.data_format == 'channels_first': # old: (filters, stack_size, ...) # new: (..., stack_size, filters) weights[0] = np.transpose(weights[0], (2, 3, 4, 1, 0)) if layer.__class__.__name__ == 'GRU': if len(weights) == 9: kernel = np.concatenate([weights[0], weights[3], weights[6]], axis=-1) recurrent_kernel = np.concatenate([weights[1], weights[4], weights[7]], axis=-1) bias = np.concatenate([weights[2], weights[5], weights[8]], axis=-1) weights = [kernel, recurrent_kernel, bias] if layer.__class__.__name__ == 'LSTM': if len(weights) == 12: # old: i, c, f, o # new: i, f, c, o kernel = np.concatenate([weights[0], weights[6], weights[3], weights[9]], axis=-1) recurrent_kernel = np.concatenate([weights[1], weights[7], weights[4], weights[10]], axis=-1) bias = np.concatenate([weights[2], weights[8], weights[5], weights[11]], axis=-1) weights = [kernel, recurrent_kernel, bias] if layer.__class__.__name__ == 'ConvLSTM2D': if len(weights) == 12: kernel = np.concatenate([weights[0], weights[6], weights[3], weights[9]], axis=-1) recurrent_kernel = np.concatenate([weights[1], weights[7], weights[4], weights[10]], axis=-1) bias = np.concatenate([weights[2], weights[8], weights[5], weights[11]], axis=-1) if layer.data_format == 'channels_first': # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) kernel = np.transpose(kernel, (2, 3, 1, 0)) recurrent_kernel = np.transpose(recurrent_kernel, (2, 3, 1, 0)) weights = [kernel, recurrent_kernel, bias] conv_layers = ['Conv1D', 'Conv2D', 'Conv3D', 'Conv2DTranspose', 'ConvLSTM2D'] if layer.__class__.__name__ in conv_layers: layer_weights_shape = K.int_shape(layer.weights[0]) if _need_convert_kernel(original_backend): weights[0] = conv_utils.convert_kernel(weights[0]) if layer.__class__.__name__ == 'ConvLSTM2D': weights[1] = conv_utils.convert_kernel(weights[1]) if reshape and layer_weights_shape != weights[0].shape: if weights[0].size != np.prod(layer_weights_shape): raise ValueError('Weights must be of equal size to ' + 'apply a reshape operation. ' + 'Layer ' + layer.name + '\'s weights have shape ' + str(layer_weights_shape) + ' and size ' + str(np.prod(layer_weights_shape)) + '. ' + 'The weights for loading have shape ' + str(weights[0].shape) + ' and size ' + str(weights[0].size) + '. ') weights[0] = np.reshape(weights[0], layer_weights_shape) elif layer_weights_shape != weights[0].shape: weights[0] = np.transpose(weights[0], (3, 2, 0, 1)) if layer.__class__.__name__ == 'ConvLSTM2D': weights[1] = np.transpose(weights[1], (3, 2, 0, 1)) # convert CuDNN layers weights = _convert_rnn_weights(layer, weights) return weights def _convert_rnn_weights(layer, weights): """Converts weights for RNN layers between native and CuDNN format. Input kernels for each gate are transposed and converted between Fortran and C layout, recurrent kernels are transposed. For LSTM biases are summed/ split in half, for GRU biases are reshaped. Weights can be converted in both directions between `LSTM` and`CuDNNSLTM` and between `CuDNNGRU` and `GRU(reset_after=True)`. Default `GRU` is not compatible with `CuDNNGRU`. For missing biases in `LSTM`/`GRU` (`use_bias=False`), no conversion is made. # Arguments layer: Target layer instance. weights: List of source weights values (input kernels, recurrent kernels, [biases]) (Numpy arrays). # Returns A list of converted weights values (Numpy arrays). # Raises ValueError: for incompatible GRU layer/weights or incompatible biases """ def transform_kernels(kernels, func, n_gates): """Transforms kernel for each gate separately using given function. # Arguments kernels: Stacked array of kernels for individual gates. func: Function applied to kernel of each gate. n_gates: Number of gates (4 for LSTM, 3 for GRU). # Returns Stacked array of transformed kernels. """ return np.hstack([func(k) for k in np.hsplit(kernels, n_gates)]) def transpose_input(from_cudnn): """Makes a function that transforms input kernels from/to CuDNN format. It keeps the shape, but changes between the layout (Fortran/C). Eg.: ``` Keras CuDNN [[0, 1, 2], <---> [[0, 2, 4], [3, 4, 5]] [1, 3, 5]] ``` It can be passed to `transform_kernels()`. # Arguments from_cudnn: `True` if source weights are in CuDNN format, `False` if they're in plain Keras format. # Returns Function that converts input kernel to the other format. """ order = 'F' if from_cudnn else 'C' def transform(kernel): return kernel.T.reshape(kernel.shape, order=order) return transform target_class = layer.__class__.__name__ # convert the weights between CuDNNLSTM and LSTM if target_class in ['LSTM', 'CuDNNLSTM'] and len(weights) == 3: # determine if we're loading a CuDNNLSTM layer # from the number of bias weights: # CuDNNLSTM has (units * 8) weights; while LSTM has (units * 4) # if there's no bias weight in the file, skip this conversion units = weights[1].shape[0] bias_shape = weights[2].shape n_gates = 4 if bias_shape == (2 * units * n_gates,): source = 'CuDNNLSTM' elif bias_shape == (units * n_gates,): source = 'LSTM' else: raise ValueError('Invalid bias shape: ' + str(bias_shape)) def convert_weights(weights, from_cudnn=True): # transpose (and reshape) input and recurrent kernels kernels = transform_kernels(weights[0], transpose_input(from_cudnn), n_gates) recurrent_kernels = transform_kernels(weights[1], lambda k: k.T, n_gates) if from_cudnn: # merge input and recurrent biases into a single set biases = np.sum(np.split(weights[2], 2, axis=0), axis=0) else: # Split single set of biases evenly to two sets. The way of # splitting doesn't matter as long as the two sets sum is kept. biases = np.tile(0.5 * weights[2], 2) return [kernels, recurrent_kernels, biases] if source != target_class: weights = convert_weights(weights, from_cudnn=source == 'CuDNNLSTM') # convert the weights between CuDNNGRU and GRU(reset_after=True) if target_class in ['GRU', 'CuDNNGRU'] and len(weights) == 3: # We can determine the source of the weights from the shape of the bias. # If there is no bias we skip the conversion # since CuDNNGRU always has biases. units = weights[1].shape[0] bias_shape = weights[2].shape n_gates = 3 def convert_weights(weights, from_cudnn=True): kernels = transform_kernels(weights[0], transpose_input(from_cudnn), n_gates) recurrent_kernels = transform_kernels(weights[1], lambda k: k.T, n_gates) biases = np.array(weights[2]).reshape((2, -1) if from_cudnn else -1) return [kernels, recurrent_kernels, biases] if bias_shape == (2 * units * n_gates,): source = 'CuDNNGRU' elif bias_shape == (2, units * n_gates): source = 'GRU(reset_after=True)' elif bias_shape == (units * n_gates,): source = 'GRU(reset_after=False)' else: raise ValueError('Invalid bias shape: ' + str(bias_shape)) if target_class == 'CuDNNGRU': target = 'CuDNNGRU' elif layer.reset_after: target = 'GRU(reset_after=True)' else: target = 'GRU(reset_after=False)' # only convert between different types if source != target: types = (source, target) if 'GRU(reset_after=False)' in types: raise ValueError('%s is not compatible with %s' % types) if source == 'CuDNNGRU': weights = convert_weights(weights, from_cudnn=True) elif source == 'GRU(reset_after=True)': weights = convert_weights(weights, from_cudnn=False) return weights def _need_convert_kernel(original_backend): """Checks if conversion on kernel matrices is required during weight loading. The convolution operation is implemented differently in different backends. While TH implements convolution, TF and CNTK implement the correlation operation. So the channel axis needs to be flipped when TF weights are loaded on a TH model, or vice versa. However, there's no conversion required between TF and CNTK. # Arguments original_backend: Keras backend the weights were trained with, as a string. # Returns `True` if conversion on kernel matrices is required, otherwise `False`. """ if original_backend is None: # backend information not available return False uses_correlation = {'tensorflow': True, 'theano': False, 'cntk': True} if original_backend not in uses_correlation: # By default, do not convert the kernels if the original backend is unknown return False if K.backend() in uses_correlation: current_uses_correlation = uses_correlation[K.backend()] else: # Assume unknown backends use correlation current_uses_correlation = True return uses_correlation[original_backend] != current_uses_correlation def load_weights_from_hdf5_group(f, layers, reshape=False): """Implements topological (order-based) weight loading. # Arguments f: A pointer to a HDF5 group. layers: a list of target layers. reshape: Reshape weights to fit the layer when the correct number of values are present but the shape does not match. # Raises ValueError: in case of mismatch between provided layers and weights file. """ if 'keras_version' in f.attrs: original_keras_version = f.attrs['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in f.attrs: original_backend = f.attrs['backend'].decode('utf8') else: original_backend = None filtered_layers = [] for layer in layers: weights = layer.weights if weights: filtered_layers.append(layer) layer_names = load_attributes_from_hdf5_group(f, 'layer_names') filtered_layer_names = [] for name in layer_names: g = f[name] weight_names = load_attributes_from_hdf5_group(g, 'weight_names') if weight_names: filtered_layer_names.append(name) layer_names = filtered_layer_names if len(layer_names) != len(filtered_layers): raise ValueError('You are trying to load a weight file ' 'containing ' + str(len(layer_names)) + ' layers into a model with ' + str(len(filtered_layers)) + ' layers.') # We batch weight value assignments in a single backend call # which provides a speedup in TensorFlow. weight_value_tuples = [] for k, name in enumerate(layer_names): g = f[name] weight_names = load_attributes_from_hdf5_group(g, 'weight_names') weight_values = [np.asarray(g[weight_name]) for weight_name in weight_names] layer = filtered_layers[k] symbolic_weights = layer.weights weight_values = preprocess_weights_for_loading(layer, weight_values, original_keras_version, original_backend, reshape=reshape) if len(weight_values) != len(symbolic_weights): raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '" in the current model) was found to ' 'correspond to layer ' + name + ' in the save file. ' 'However the new layer ' + layer.name + ' expects ' + str(len(symbolic_weights)) + ' weights, but the saved weights have ' + str(len(weight_values)) + ' elements.') weight_value_tuples += zip(symbolic_weights, weight_values) K.batch_set_value(weight_value_tuples) def load_weights_from_hdf5_group_by_name(f, layers, skip_mismatch=False, reshape=False): """Implements name-based weight loading. (instead of topological weight loading). Layers that have no matching name are skipped. # Arguments f: A pointer to a HDF5 group. layers: A list of target layers. skip_mismatch: Boolean, whether to skip loading of layers where there is a mismatch in the number of weights, or a mismatch in the shape of the weights. reshape: Reshape weights to fit the layer when the correct number of values are present but the shape does not match. # Raises ValueError: in case of mismatch between provided layers and weights file and skip_mismatch=False. """ if 'keras_version' in f.attrs: original_keras_version = f.attrs['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in f.attrs: original_backend = f.attrs['backend'].decode('utf8') else: original_backend = None # New file format. layer_names = load_attributes_from_hdf5_group(f, 'layer_names') # Reverse index of layer name to list of layers with name. index = {} for layer in layers: if layer.name: index.setdefault(layer.name, []).append(layer) # We batch weight value assignments in a single backend call # which provides a speedup in TensorFlow. weight_value_tuples = [] for k, name in enumerate(layer_names): g = f[name] weight_names = load_attributes_from_hdf5_group(g, 'weight_names') weight_values = [np.asarray(g[weight_name]) for weight_name in weight_names] for layer in index.get(name, []): symbolic_weights = layer.weights weight_values = preprocess_weights_for_loading( layer, weight_values, original_keras_version, original_backend, reshape=reshape) if len(weight_values) != len(symbolic_weights): if skip_mismatch: warnings.warn('Skipping loading of weights for ' 'layer {}'.format(layer.name) + ' due to mismatch ' 'in number of weights ({} vs {}).'.format( len(symbolic_weights), len(weight_values))) continue else: raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '") expects ' + str(len(symbolic_weights)) + ' weight(s), but the saved weights' + ' have ' + str(len(weight_values)) + ' element(s).') # Set values. for i in range(len(weight_values)): symbolic_shape = K.int_shape(symbolic_weights[i]) if symbolic_shape != weight_values[i].shape: if skip_mismatch: warnings.warn('Skipping loading of weights for ' 'layer {}'.format(layer.name) + ' due to ' 'mismatch in shape ({} vs {}).'.format( symbolic_weights[i].shape, weight_values[i].shape)) continue else: raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '"), weight ' + str(symbolic_weights[i]) + ' has shape {}'.format(symbolic_shape) + ', but the saved weight has shape ' + str(weight_values[i].shape) + '.') else: weight_value_tuples.append((symbolic_weights[i], weight_values[i])) K.batch_set_value(weight_value_tuples) Keras-2.2.4/keras/engine/__init__.py0000644000000000116100000000042713305602621017005 0ustar rooteng00000000000000# note: `Node` is an internal class, # it isn't meant to be used by Keras users. from .input_layer import Input from .input_layer import InputLayer from .base_layer import InputSpec from .base_layer import Layer from .network import get_source_inputs from .training import Model Keras-2.2.4/keras/engine/training_arrays.py0000644000000000116100000004007613326715636020464 0ustar rooteng00000000000000"""Part of the training engine related to plain array data (e.g. Numpy). """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from scipy.sparse import issparse from .training_utils import batch_shuffle from .training_utils import make_batches from .training_utils import check_num_samples from .. import backend as K from .. import callbacks as cbks from ..utils.generic_utils import Progbar from ..utils.generic_utils import slice_arrays from ..utils.generic_utils import to_list from ..utils.generic_utils import unpack_singleton def fit_loop(model, f, ins, out_labels=None, batch_size=None, epochs=100, verbose=1, callbacks=None, val_f=None, val_ins=None, shuffle=True, callback_metrics=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None): """Abstract fit function for `f(ins)`. Assumes that f returns a list, labeled by out_labels. # Arguments model: Keras model instance. f: Keras function returning a list of tensors ins: List of tensors to be fed to `f` out_labels: List of strings, display names of the outputs of `f` batch_size: Integer batch size or None if unknown. epochs: Number of times to iterate over the data verbose: Verbosity mode, 0, 1 or 2 callbacks: List of callbacks to be called during training val_f: Keras function to call for validation val_ins: List of tensors to be fed to `val_f` shuffle: Whether to shuffle the data at the beginning of each epoch callback_metrics: List of strings, the display names of the metrics passed to the callbacks. They should be the concatenation of list the display names of the outputs of `f` and the list of display names of the outputs of `f_val`. initial_epoch: Epoch at which to start training (useful for resuming a previous training run) steps_per_epoch: Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. Ignored with the default value of `None`. validation_steps: Number of steps to run validation for (only if doing validation from data tensors). Ignored with the default value of `None`. # Returns `History` object. """ do_validation = False if val_f and val_ins: do_validation = True if (verbose and ins and hasattr(ins[0], 'shape') and hasattr(val_ins[0], 'shape')): print('Train on %d samples, validate on %d samples' % (ins[0].shape[0], val_ins[0].shape[0])) if validation_steps: do_validation = True if steps_per_epoch is None: raise ValueError('Can only use `validation_steps` ' 'when doing step-wise ' 'training, i.e. `steps_per_epoch` ' 'must be set.') elif do_validation: if steps_per_epoch: raise ValueError('Must specify `validation_steps` ' 'to perform validation ' 'when doing step-wise training.') num_train_samples = check_num_samples(ins, batch_size=batch_size, steps=steps_per_epoch, steps_name='steps_per_epoch') if num_train_samples is not None: index_array = np.arange(num_train_samples) model.history = cbks.History() _callbacks = [cbks.BaseLogger( stateful_metrics=model.stateful_metric_names)] if verbose: if steps_per_epoch is not None: count_mode = 'steps' else: count_mode = 'samples' _callbacks.append( cbks.ProgbarLogger( count_mode, stateful_metrics=model.stateful_metric_names)) _callbacks += (callbacks or []) + [model.history] callbacks = cbks.CallbackList(_callbacks) out_labels = out_labels or [] # it's possible to callback a different model than itself # (used by Sequential models) if hasattr(model, 'callback_model') and model.callback_model: callback_model = model.callback_model else: callback_model = model callbacks.set_model(callback_model) callbacks.set_params({ 'batch_size': batch_size, 'epochs': epochs, 'steps': steps_per_epoch, 'samples': num_train_samples, 'verbose': verbose, 'do_validation': do_validation, 'metrics': callback_metrics or [], }) callbacks.on_train_begin() callback_model.stop_training = False for cbk in callbacks: cbk.validation_data = val_ins # To prevent a slowdown, # we find beforehand the arrays that need conversion. feed = (model._feed_inputs + model._feed_targets + model._feed_sample_weights) indices_for_conversion_to_dense = [] for i in range(len(feed)): if issparse(ins[i]) and not K.is_sparse(feed[i]): indices_for_conversion_to_dense.append(i) for epoch in range(initial_epoch, epochs): # Reset stateful metrics for m in model.stateful_metric_functions: m.reset_states() callbacks.on_epoch_begin(epoch) epoch_logs = {} if steps_per_epoch is not None: for step_index in range(steps_per_epoch): batch_logs = {} batch_logs['batch'] = step_index batch_logs['size'] = 1 callbacks.on_batch_begin(step_index, batch_logs) outs = f(ins) outs = to_list(outs) for l, o in zip(out_labels, outs): batch_logs[l] = o callbacks.on_batch_end(step_index, batch_logs) if callback_model.stop_training: break if do_validation: val_outs = test_loop(model, val_f, val_ins, steps=validation_steps, verbose=0) val_outs = to_list(val_outs) # Same labels assumed. for l, o in zip(out_labels, val_outs): epoch_logs['val_' + l] = o else: if shuffle == 'batch': index_array = batch_shuffle(index_array, batch_size) elif shuffle: np.random.shuffle(index_array) batches = make_batches(num_train_samples, batch_size) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] try: if isinstance(ins[-1], float): # Do not slice the training phase flag. ins_batch = slice_arrays( ins[:-1], batch_ids) + [ins[-1]] else: ins_batch = slice_arrays(ins, batch_ids) except TypeError: raise TypeError('TypeError while preparing batch. ' 'If using HDF5 input data, ' 'pass shuffle="batch".') batch_logs = {} batch_logs['batch'] = batch_index batch_logs['size'] = len(batch_ids) callbacks.on_batch_begin(batch_index, batch_logs) for i in indices_for_conversion_to_dense: ins_batch[i] = ins_batch[i].toarray() outs = f(ins_batch) outs = to_list(outs) for l, o in zip(out_labels, outs): batch_logs[l] = o callbacks.on_batch_end(batch_index, batch_logs) if callback_model.stop_training: break if batch_index == len(batches) - 1: # Last batch. if do_validation: val_outs = test_loop(model, val_f, val_ins, batch_size=batch_size, verbose=0) val_outs = to_list(val_outs) # Same labels assumed. for l, o in zip(out_labels, val_outs): epoch_logs['val_' + l] = o callbacks.on_epoch_end(epoch, epoch_logs) if callback_model.stop_training: break callbacks.on_train_end() return model.history def predict_loop(model, f, ins, batch_size=32, verbose=0, steps=None): """Abstract method to loop over some data in batches. # Arguments model: Keras model instance. f: Keras function returning a list of tensors. ins: list of tensors to be fed to `f`. batch_size: integer batch size. verbose: verbosity mode. steps: Total number of steps (batches of samples) before declaring `predict_loop` finished. Ignored with the default value of `None`. # Returns Array of predictions (if the model has a single output) or list of arrays of predictions (if the model has multiple outputs). """ num_samples = check_num_samples(ins, batch_size=batch_size, steps=steps, steps_name='steps') if verbose == 1: if steps is not None: progbar = Progbar(target=steps) else: progbar = Progbar(target=num_samples) indices_for_conversion_to_dense = [] for i in range(len(model._feed_inputs)): if issparse(ins[i]) and not K.is_sparse(model._feed_inputs[i]): indices_for_conversion_to_dense.append(i) if steps is not None: # Step-based predictions. # Since we do not know how many samples # we will see, we cannot pre-allocate # the returned Numpy arrays. # Instead, we store one array per batch seen # and concatenate them upon returning. unconcatenated_outs = [] for step in range(steps): batch_outs = f(ins) batch_outs = to_list(batch_outs) if step == 0: for batch_out in batch_outs: unconcatenated_outs.append([]) for i, batch_out in enumerate(batch_outs): unconcatenated_outs[i].append(batch_out) if verbose == 1: progbar.update(step + 1) if len(unconcatenated_outs) == 1: return np.concatenate(unconcatenated_outs[0], axis=0) return [np.concatenate(unconcatenated_outs[i], axis=0) for i in range(len(unconcatenated_outs))] else: # Sample-based predictions. outs = [] batches = make_batches(num_samples, batch_size) index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] if ins and isinstance(ins[-1], float): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: ins_batch = slice_arrays(ins, batch_ids) for i in indices_for_conversion_to_dense: ins_batch[i] = ins_batch[i].toarray() batch_outs = f(ins_batch) batch_outs = to_list(batch_outs) if batch_index == 0: # Pre-allocate the results arrays. for batch_out in batch_outs: shape = (num_samples,) + batch_out.shape[1:] outs.append(np.zeros(shape, dtype=batch_out.dtype)) for i, batch_out in enumerate(batch_outs): outs[i][batch_start:batch_end] = batch_out if verbose == 1: progbar.update(batch_end) return unpack_singleton(outs) def test_loop(model, f, ins, batch_size=None, verbose=0, steps=None): """Abstract method to loop over some data in batches. # Arguments model: Keras model instance. f: Keras function returning a list of tensors. ins: list of tensors to be fed to `f`. batch_size: integer batch size or `None`. verbose: verbosity mode. steps: Total number of steps (batches of samples) before declaring predictions finished. Ignored with the default value of `None`. # Returns Scalar loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. """ if hasattr(model, 'metrics'): for m in model.stateful_metric_functions: m.reset_states() stateful_metric_indices = [ i for i, name in enumerate(model.metrics_names) if str(name) in model.stateful_metric_names] else: stateful_metric_indices = [] num_samples = check_num_samples(ins, batch_size=batch_size, steps=steps, steps_name='steps') outs = [] if verbose == 1: if steps is not None: progbar = Progbar(target=steps) else: progbar = Progbar(target=num_samples) # To prevent a slowdown, # we find beforehand the arrays that need conversion. feed = (model._feed_inputs + model._feed_targets + model._feed_sample_weights) indices_for_conversion_to_dense = [] for i in range(len(feed)): if issparse(ins[i]) and not K.is_sparse(feed[i]): indices_for_conversion_to_dense.append(i) if steps is not None: for step in range(steps): batch_outs = f(ins) if isinstance(batch_outs, list): if step == 0: for _ in enumerate(batch_outs): outs.append(0.) for i, batch_out in enumerate(batch_outs): if i in stateful_metric_indices: outs[i] = float(batch_out) else: outs[i] += batch_out else: if step == 0: outs.append(0.) outs[0] += batch_outs if verbose == 1: progbar.update(step + 1) for i in range(len(outs)): if i not in stateful_metric_indices: outs[i] /= steps else: batches = make_batches(num_samples, batch_size) index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] if isinstance(ins[-1], float): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: ins_batch = slice_arrays(ins, batch_ids) for i in indices_for_conversion_to_dense: ins_batch[i] = ins_batch[i].toarray() batch_outs = f(ins_batch) if isinstance(batch_outs, list): if batch_index == 0: for batch_out in enumerate(batch_outs): outs.append(0.) for i, batch_out in enumerate(batch_outs): if i in stateful_metric_indices: outs[i] = batch_out else: outs[i] += batch_out * len(batch_ids) else: if batch_index == 0: outs.append(0.) outs[0] += batch_outs * len(batch_ids) if verbose == 1: progbar.update(batch_end) for i in range(len(outs)): if i not in stateful_metric_indices: outs[i] /= num_samples return unpack_singleton(outs) Keras-2.2.4/keras/engine/input_layer.py0000644000000000116100000001651613342055016017610 0ustar rooteng00000000000000"""Input layer code (`Input` and `InputLayer`). """ from __future__ import print_function from __future__ import absolute_import from __future__ import division from .base_layer import Layer from .base_layer import Node from .. import backend as K from ..legacy import interfaces from ..utils.generic_utils import unpack_singleton class InputLayer(Layer): """Layer to be used as an entry point into a model. It can either wrap an existing tensor (pass an `input_tensor` argument) or create its a placeholder tensor (pass arguments `input_shape` or `batch_input_shape` as well as `dtype`). # Arguments input_shape: Shape tuple, not including the batch axis. batch_size: Optional input batch size (integer or None). batch_input_shape: Shape tuple, including the batch axis. dtype: Datatype of the input. input_tensor: Optional tensor to use as layer input instead of creating a placeholder. sparse: Boolean, whether the placeholder created is meant to be sparse. name: Name of the layer (string). """ @interfaces.legacy_input_support def __init__(self, input_shape=None, batch_size=None, batch_input_shape=None, dtype=None, input_tensor=None, sparse=False, name=None): if not name: prefix = 'input' name = prefix + '_' + str(K.get_uid(prefix)) super(InputLayer, self).__init__(dtype=dtype, name=name) self.trainable = False self.built = True self.sparse = sparse self.supports_masking = True if input_shape and batch_input_shape: raise ValueError('Only provide the input_shape OR ' 'batch_input_shape argument to ' 'InputLayer, not both at the same time.') if input_tensor is not None and batch_input_shape is None: # If input_tensor is set, and batch_input_shape is not set: # Attempt automatic input shape inference. try: batch_input_shape = K.int_shape(input_tensor) except TypeError: if not input_shape and not batch_input_shape: raise ValueError('InputLayer was provided ' 'an input_tensor argument, ' 'but its input shape cannot be ' 'automatically inferred. ' 'You should pass an input_shape or ' 'batch_input_shape argument.') if not batch_input_shape: if not input_shape: raise ValueError('An Input layer should be passed either ' 'a `batch_input_shape` or an `input_shape`.') else: batch_input_shape = (batch_size,) + tuple(input_shape) else: batch_input_shape = tuple(batch_input_shape) if not dtype: if input_tensor is None: dtype = K.floatx() else: dtype = K.dtype(input_tensor) self.batch_input_shape = batch_input_shape self.dtype = dtype if input_tensor is None: self.is_placeholder = True input_tensor = K.placeholder(shape=batch_input_shape, dtype=dtype, sparse=self.sparse, name=self.name) else: self.is_placeholder = False input_tensor._keras_shape = batch_input_shape # Create an input node to add to self.outbound_node # and set output_tensors' _keras_history. input_tensor._uses_learning_phase = False input_tensor._keras_history = (self, 0, 0) Node(self, inbound_layers=[], node_indices=[], tensor_indices=[], input_tensors=[input_tensor], output_tensors=[input_tensor], input_masks=[None], output_masks=[None], input_shapes=[batch_input_shape], output_shapes=[batch_input_shape]) def get_config(self): config = {'batch_input_shape': self.batch_input_shape, 'dtype': self.dtype, 'sparse': self.sparse, 'name': self.name} return config def Input(shape=None, batch_shape=None, name=None, dtype=None, sparse=False, tensor=None): """`Input()` is used to instantiate a Keras tensor. A Keras tensor is a tensor object from the underlying backend (Theano, TensorFlow or CNTK), which we augment with certain attributes that allow us to build a Keras model just by knowing the inputs and outputs of the model. For instance, if a, b and c are Keras tensors, it becomes possible to do: `model = Model(input=[a, b], output=c)` The added Keras attributes are: `_keras_shape`: Integer shape tuple propagated via Keras-side shape inference. `_keras_history`: Last layer applied to the tensor. the entire layer graph is retrievable from that layer, recursively. # Arguments shape: A shape tuple (integer), not including the batch size. For instance, `shape=(32,)` indicates that the expected input will be batches of 32-dimensional vectors. batch_shape: A shape tuple (integer), including the batch size. For instance, `batch_shape=(10, 32)` indicates that the expected input will be batches of 10 32-dimensional vectors. `batch_shape=(None, 32)` indicates batches of an arbitrary number of 32-dimensional vectors. name: An optional name string for the layer. Should be unique in a model (do not reuse the same name twice). It will be autogenerated if it isn't provided. dtype: The data type expected by the input, as a string (`float32`, `float64`, `int32`...) sparse: A boolean specifying whether the placeholder to be created is sparse. tensor: Optional existing tensor to wrap into the `Input` layer. If set, the layer will not create a placeholder tensor. # Returns A tensor. # Example ```python # this is a logistic regression in Keras x = Input(shape=(32,)) y = Dense(16, activation='softmax')(x) model = Model(x, y) ``` """ if not batch_shape and tensor is None: assert shape is not None, ('Please provide to Input either a `shape`' ' or a `batch_shape` argument. Note that ' '`shape` does not include the batch ' 'dimension.') if shape is not None and not batch_shape: batch_shape = (None,) + tuple(shape) if not dtype: dtype = K.floatx() input_layer = InputLayer(batch_input_shape=batch_shape, name=name, dtype=dtype, sparse=sparse, input_tensor=tensor) # Return tensor including _keras_shape and _keras_history. # Note that in this case train_output and test_output are the same pointer. outputs = input_layer._inbound_nodes[0].output_tensors return unpack_singleton(outputs) Keras-2.2.4/keras/engine/sequential.py0000644000000000116100000002571313355226611017433 0ustar rooteng00000000000000"""Sequential model class. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import warnings import copy from . import network from .training import Model from .base_layer import Layer from .input_layer import Input from .input_layer import InputLayer from .. import backend as K from .. import layers as layer_module try: import h5py except ImportError: h5py = None class Sequential(Model): """Linear stack of layers. # Arguments layers: list of layers to add to the model. # Example ```python # Optionally, the first layer can receive an `input_shape` argument: model = Sequential() model.add(Dense(32, input_shape=(500,))) # Afterwards, we do automatic shape inference: model.add(Dense(32)) # This is identical to the following: model = Sequential() model.add(Dense(32, input_dim=500)) # And to the following: model = Sequential() model.add(Dense(32, batch_input_shape=(None, 500))) # Note that you can also omit the `input_shape` argument: # In that case the model gets built the first time you call `fit` (or other # training and evaluation methods). model = Sequential() model.add(Dense(32)) model.add(Dense(32)) model.compile(optimizer=optimizer, loss=loss) # This builds the model for the first time: model.fit(x, y, batch_size=32, epochs=10) # Note that when using this delayed-build pattern # (no input shape specified), # the model doesn't have any weights until the first call # to a training/evaluation method (since it isn't yet built): model = Sequential() model.add(Dense(32)) model.add(Dense(32)) model.weights # returns [] # Whereas if you specify the input shape, the model gets built continuously # as you are adding layers: model = Sequential() model.add(Dense(32, input_shape=(500,))) model.add(Dense(32)) model.weights # returns list of length 4 # When using the delayed-build pattern (no input shape specified), you can # choose to manually build your model by calling # `build(batch_input_shape)`: model = Sequential() model.add(Dense(32)) model.add(Dense(32)) model.build((None, 500)) model.weights # returns list of length 4 ``` """ def __init__(self, layers=None, name=None): super(Sequential, self).__init__(name=name) self._build_input_shape = None # Add to the model any layers passed to the constructor. if layers: for layer in layers: self.add(layer) @property def layers(self): # Historically, `sequential.layers` only returns layers that were added # via `add`, and omits the auto-generated `InputLayer` # that comes at the bottom of the stack. if self._layers and isinstance(self._layers[0], InputLayer): return self._layers[1:] return self._layers @property def model(self): # Historically, `Sequential` was once # implemented as a wrapper for `Model` which maintained # its underlying `Model` as the `model` property. # We keep it for compatibility reasons. warnings.warn('`Sequential.model` is deprecated. ' '`Sequential` is a subclass of `Model`, you can ' 'just use your `Sequential` instance directly.') return self def add(self, layer): """Adds a layer instance on top of the layer stack. # Arguments layer: layer instance. # Raises TypeError: If `layer` is not a layer instance. ValueError: In case the `layer` argument does not know its input shape. ValueError: In case the `layer` argument has multiple output tensors, or is already connected somewhere else (forbidden in `Sequential` models). """ if not isinstance(layer, Layer): raise TypeError('The added layer must be ' 'an instance of class Layer. ' 'Found: ' + str(layer)) self.built = False if not self._layers: set_inputs = False # First layer in model: check that it is an input layer. if not isinstance(layer, InputLayer): # Create an input tensor and call `layer` on the input tensor. # First, we need to infer the expected input shape and dtype. first_layer = layer if isinstance(layer, (Model, Sequential)): # We were passed a model as first layer. # This requires a specific way to figure out the # input shape and dtype. if not layer.layers: raise ValueError('Cannot add an empty model ' 'to a `Sequential` model.') # In case of nested models: recover the first layer # of the deepest model to infer input shape and dtype. first_layer = layer.layers[0] while isinstance(first_layer, (Model, Sequential)): first_layer = first_layer.layers[0] if hasattr(first_layer, 'batch_input_shape'): batch_shape = first_layer.batch_input_shape dtype = first_layer.dtype # Instantiate the input layer. x = Input( batch_shape=batch_shape, dtype=dtype, name=layer.name + '_input') # This will build the current layer # and create the node connecting the current layer # to the input layer we just created. layer(x) set_inputs = True else: # Corner case where the user passes an InputLayer via `add`. assert len(layer._inbound_nodes[-1].output_tensors) == 1 set_inputs = True if set_inputs: if len(layer._inbound_nodes[-1].output_tensors) != 1: raise ValueError('All layers in a Sequential model ' 'should have a single output tensor. ' 'For multi-output layers, ' 'use the functional API.') self.outputs = [layer._inbound_nodes[-1].output_tensors[0]] self.inputs = network.get_source_inputs(self.outputs[0]) elif self.outputs: output_tensor = layer(self.outputs[0]) if isinstance(output_tensor, list): raise TypeError('All layers in a Sequential model ' 'should have a single output tensor. ' 'For multi-output layers, ' 'use the functional API.') self.outputs = [output_tensor] if self.inputs: self.build() else: self._layers.append(layer) def pop(self): """Removes the last layer in the model. # Raises TypeError: if there are no layers in the model. """ if not self.layers: raise TypeError('There are no layers in the model.') self._layers.pop() self.built = False if not self.layers: self.outputs = None self.inputs = None elif self.outputs: self.layers[-1]._outbound_nodes = [] self.outputs = [self.layers[-1].output] self.build() def build(self, input_shape=None): if input_shape and not self.inputs: batch_shape = tuple(input_shape) dtype = K.floatx() x = Input(batch_shape=batch_shape, dtype=dtype, name=self.name + '_input') self.inputs = [x] for layer in self._layers: x = layer(x) self.outputs = [x] self._build_input_shape = input_shape if self.inputs: self._init_graph_network(self.inputs, self.outputs, name=self.name) self.built = True def predict_proba(self, x, batch_size=32, verbose=0): """Generates class probability predictions for the input samples. The input samples are processed batch by batch. # Arguments x: input data, as a Numpy array or list of Numpy arrays (if the model has multiple inputs). batch_size: integer. verbose: verbosity mode, 0 or 1. # Returns A Numpy array of probability predictions. """ preds = self.predict(x, batch_size, verbose) if preds.min() < 0. or preds.max() > 1.: warnings.warn('Network returning invalid probability values. ' 'The last layer might not normalize predictions ' 'into probabilities ' '(like softmax or sigmoid would).') return preds def predict_classes(self, x, batch_size=32, verbose=0): """Generate class predictions for the input samples. The input samples are processed batch by batch. # Arguments x: input data, as a Numpy array or list of Numpy arrays (if the model has multiple inputs). batch_size: integer. verbose: verbosity mode, 0 or 1. # Returns: A numpy array of class predictions. """ proba = self.predict(x, batch_size=batch_size, verbose=verbose) if proba.shape[-1] > 1: return proba.argmax(axis=-1) else: return (proba > 0.5).astype('int32') def get_config(self): layer_configs = [] for layer in self.layers: layer_configs.append({ 'class_name': layer.__class__.__name__, 'config': layer.get_config() }) config = { 'name': self.name, 'layers': copy.deepcopy(layer_configs) } if self._build_input_shape: config['build_input_shape'] = self._build_input_shape return config @classmethod def from_config(cls, config, custom_objects=None): if 'name' in config: name = config['name'] build_input_shape = config.get('build_input_shape') layer_configs = config['layers'] else: # legacy config file name = build_input_shape = None layer_configs = config model = cls(name=name) for conf in layer_configs: layer = layer_module.deserialize(conf, custom_objects=custom_objects) model.add(layer) if not model.inputs and build_input_shape: model.build(build_input_shape) return model Keras-2.2.4/keras/engine/training_utils.py0000644000000000116100000005711713342055016020312 0ustar rooteng00000000000000"""Training-related utilities. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy import numpy as np import warnings from .. import backend as K from .. import losses from ..utils.generic_utils import to_list def standardize_single_array(x): if x is None: return None elif K.is_tensor(x): shape = K.int_shape(x) if shape is None or shape[0] is None: raise ValueError( 'When feeding symbolic tensors to a model, we expect the' 'tensors to have a static batch size. ' 'Got tensor with shape: %s' % str(shape)) return x elif x.ndim == 1: x = np.expand_dims(x, 1) return x def standardize_input_data(data, names, shapes=None, check_batch_axis=True, exception_prefix=''): """Normalizes inputs and targets provided by users. Users may pass data as a list of arrays, dictionary of arrays, or as a single array. We normalize this to an ordered list of arrays (same order as `names`), while checking that the provided arrays have shapes that match the network's expectations. # Arguments data: User-provided input data (polymorphic). names: List of expected array names. shapes: Optional list of expected array shapes. check_batch_axis: Boolean; whether to check that the batch axis of the arrays matches the expected value found in `shapes`. exception_prefix: String prefix used for exception formatting. # Returns List of standardized input arrays (one array per model input). # Raises ValueError: in case of improperly formatted user-provided data. """ if not names: if data is not None and hasattr(data, '__len__') and len(data): raise ValueError('Error when checking model ' + exception_prefix + ': ' 'expected no data, but got:', data) return [] if data is None: return [None for _ in range(len(names))] if isinstance(data, dict): try: data = [ data[x].values if data[x].__class__.__name__ == 'DataFrame' else data[x] for x in names ] except KeyError as e: raise ValueError('No data provided for "' + e.args[0] + '". Need data ' 'for each key in: ' + str(names)) elif isinstance(data, list): if isinstance(data[0], list): data = [np.asarray(d) for d in data] elif len(names) == 1 and isinstance(data[0], (float, int)): data = [np.asarray(data)] else: data = [ x.values if x.__class__.__name__ == 'DataFrame' else x for x in data ] else: data = data.values if data.__class__.__name__ == 'DataFrame' else data data = [data] data = [standardize_single_array(x) for x in data] if len(data) != len(names): if data and hasattr(data[0], 'shape'): raise ValueError( 'Error when checking model ' + exception_prefix + ': the list of Numpy arrays that you are passing to ' 'your model is not the size the model expected. ' 'Expected to see ' + str(len(names)) + ' array(s), ' 'but instead got the following list of ' + str(len(data)) + ' arrays: ' + str(data)[:200] + '...') elif len(names) > 1: raise ValueError( 'Error when checking model ' + exception_prefix + ': you are passing a list as input to your model, ' 'but the model expects a list of ' + str(len(names)) + ' Numpy arrays instead. ' 'The list you passed was: ' + str(data)[:200]) elif len(data) == 1 and not hasattr(data[0], 'shape'): raise TypeError('Error when checking model ' + exception_prefix + ': data should be a Numpy array, or list/dict of ' 'Numpy arrays. Found: ' + str(data)[:200] + '...') elif len(names) == 1: data = [np.asarray(data)] # Check shapes compatibility. if shapes: for i in range(len(names)): if shapes[i] is not None and not K.is_tensor(data[i]): data_shape = data[i].shape shape = shapes[i] if data[i].ndim != len(shape): raise ValueError( 'Error when checking ' + exception_prefix + ': expected ' + names[i] + ' to have ' + str(len(shape)) + ' dimensions, but got array ' 'with shape ' + str(data_shape)) if not check_batch_axis: data_shape = data_shape[1:] shape = shape[1:] for dim, ref_dim in zip(data_shape, shape): if ref_dim != dim and ref_dim: raise ValueError( 'Error when checking ' + exception_prefix + ': expected ' + names[i] + ' to have shape ' + str(shape) + ' but got array with shape ' + str(data_shape)) return data def standardize_sample_or_class_weights(x_weight, output_names, weight_type): """Maps `sample_weight` or `class_weight` to model outputs. # Arguments x_weight: User-provided `sample_weight` or `class_weight` argument. output_names: List of output names (strings) in the model. weight_type: A string used purely for exception printing. # Returns A list of `sample_weight` or `class_weight` where there are exactly one element per model output. # Raises ValueError: In case of invalid user-provided argument. """ if x_weight is None or len(x_weight) == 0: return [None for _ in output_names] if len(output_names) == 1: if isinstance(x_weight, list) and len(x_weight) == 1: return x_weight if isinstance(x_weight, dict) and output_names[0] in x_weight: return [x_weight[output_names[0]]] else: return [x_weight] if isinstance(x_weight, list): if len(x_weight) != len(output_names): raise ValueError('Provided `' + weight_type + '` was a list of ' + str(len(x_weight)) + ' elements, but the model has ' + str(len(output_names)) + ' outputs. ' 'You should provide one `' + weight_type + '`' 'array per model output.') return x_weight if isinstance(x_weight, dict): x_weights = [] for name in output_names: x_weights.append(x_weight.get(name)) return x_weights else: raise TypeError('The model has multiple outputs, so `' + weight_type + '` ' 'should be either a list or a dict. ' 'Provided `' + weight_type + '` type not understood: ' + str(x_weight)) def standardize_class_weights(class_weight, output_names): return standardize_sample_or_class_weights(class_weight, output_names, 'class_weight') def standardize_sample_weights(sample_weight, output_names): return standardize_sample_or_class_weights(sample_weight, output_names, 'sample_weight') def check_array_length_consistency(inputs, targets, weights=None): """Checks if batch axes are the same for numpy arrays. # Arguments inputs: list of Numpy arrays of inputs. targets: list of Numpy arrays of targets. weights: list of Numpy arrays of sample weights. # Raises ValueError: in case of incorrectly formatted data. """ def set_of_lengths(x): # return a set with the variation between # different shapes, with None => 0 if x is None: return {0} else: return set([0 if y is None else int(y.shape[0]) for y in x]) set_x = set_of_lengths(inputs) set_y = set_of_lengths(targets) set_w = set_of_lengths(weights) if len(set_x) > 1: raise ValueError('All input arrays (x) should have ' 'the same number of samples. Got array shapes: ' + str([x.shape for x in inputs])) if len(set_y) > 1: raise ValueError('All target arrays (y) should have ' 'the same number of samples. Got array shapes: ' + str([y.shape for y in targets])) if set_x and set_y and list(set_x)[0] != list(set_y)[0]: raise ValueError('Input arrays should have ' 'the same number of samples as target arrays. ' 'Found ' + str(list(set_x)[0]) + ' input samples ' 'and ' + str(list(set_y)[0]) + ' target samples.') if len(set_w) > 1: raise ValueError('All sample_weight arrays should have ' 'the same number of samples. Got array shapes: ' + str([w.shape for w in weights])) if set_y and set_w and list(set_y)[0] != list(set_w)[0]: raise ValueError('Sample_weight arrays should have ' 'the same number of samples as target arrays. Got ' + str(list(set_y)[0]) + ' input samples and ' + str(list(set_w)[0]) + ' target samples.') def check_loss_and_target_compatibility(targets, loss_fns, output_shapes): """Does validation on the compatibility of targets and loss functions. This helps prevent users from using loss functions incorrectly. # Arguments targets: list of Numpy arrays of targets. loss_fns: list of loss functions. output_shapes: list of shapes of model outputs. # Raises ValueError: if a loss function or target array is incompatible with an output. """ key_losses = {losses.mean_squared_error, losses.binary_crossentropy, losses.categorical_crossentropy} for y, loss, shape in zip(targets, loss_fns, output_shapes): if y is None or loss is None: continue if loss is losses.categorical_crossentropy: if y.shape[-1] == 1: raise ValueError( 'You are passing a target array of shape ' + str(y.shape) + ' while using as loss `categorical_crossentropy`. ' '`categorical_crossentropy` expects ' 'targets to be binary matrices (1s and 0s) ' 'of shape (samples, classes). ' 'If your targets are integer classes, ' 'you can convert them to the expected format via:\n' '```\n' 'from keras.utils import to_categorical\n' 'y_binary = to_categorical(y_int)\n' '```\n' '\n' 'Alternatively, you can use the loss function ' '`sparse_categorical_crossentropy` instead, ' 'which does expect integer targets.') if loss in key_losses: for target_dim, out_dim in zip(y.shape[1:], shape[1:]): if out_dim is not None and target_dim != out_dim: raise ValueError( 'A target array with shape ' + str(y.shape) + ' was passed for an output of shape ' + str(shape) + ' while using as loss `' + loss.__name__ + '`. ' 'This loss expects ' 'targets to have the same shape ' 'as the output.') def collect_metrics(metrics, output_names): """Maps metric functions to model outputs. # Arguments metrics: a list or dict of metric functions. output_names: a list of the names (strings) of model outputs. # Returns A list (one entry per model output) of lists of metric functions. For instance, if the model has 2 outputs, and for the first output we want to compute "binary_accuracy" and "binary_crossentropy", and just "binary_accuracy" for the second output, the list would look like: `[[binary_accuracy, binary_crossentropy], [binary_accuracy]]` # Raises TypeError: if an incorrect type is passed for the `metrics` argument. """ if not metrics: return [[] for _ in output_names] if isinstance(metrics, list): # we then apply all metrics to all outputs. return [copy.copy(metrics) for _ in output_names] elif isinstance(metrics, dict): nested_metrics = [] for name in output_names: output_metrics = metrics.get(name, []) output_metrics = to_list(output_metrics) nested_metrics.append(output_metrics) return nested_metrics else: raise TypeError('Type of `metrics` argument not understood. ' 'Expected a list or dictionary, found: ' + str(metrics)) def batch_shuffle(index_array, batch_size): """Shuffles an array in a batch-wise fashion. Useful for shuffling HDF5 arrays (where one cannot access arbitrary indices). # Arguments index_array: array of indices to be shuffled. batch_size: integer. # Returns The `index_array` array, shuffled in a batch-wise fashion. """ batch_count = int(len(index_array) / batch_size) # to reshape we need to be cleanly divisible by batch size # we stash extra items and reappend them after shuffling last_batch = index_array[batch_count * batch_size:] index_array = index_array[:batch_count * batch_size] index_array = index_array.reshape((batch_count, batch_size)) np.random.shuffle(index_array) index_array = index_array.flatten() return np.append(index_array, last_batch) def make_batches(size, batch_size): """Returns a list of batch indices (tuples of indices). # Arguments size: Integer, total size of the data to slice into batches. batch_size: Integer, batch size. # Returns A list of tuples of array indices. """ num_batches = (size + batch_size - 1) // batch_size # round up return [(i * batch_size, min(size, (i + 1) * batch_size)) for i in range(num_batches)] def weighted_masked_objective(fn): """Adds support for masking and sample-weighting to an objective function. It transforms an objective function `fn(y_true, y_pred)` into a sample-weighted, cost-masked objective function `fn(y_true, y_pred, weights, mask)`. # Arguments fn: The objective function to wrap, with signature `fn(y_true, y_pred)`. # Returns A function with signature `fn(y_true, y_pred, weights, mask)`. """ if fn is None: return None def weighted(y_true, y_pred, weights, mask=None): """Wrapper function. # Arguments y_true: `y_true` argument of `fn`. y_pred: `y_pred` argument of `fn`. weights: Weights tensor. mask: Mask tensor. # Returns Scalar tensor. """ # score_array has ndim >= 2 score_array = fn(y_true, y_pred) if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in Theano mask = K.cast(mask, K.floatx()) # mask should have the same shape as score_array score_array *= mask # the loss per batch should be proportional # to the number of unmasked samples. score_array /= K.mean(mask) # apply sample weighting if weights is not None: # reduce score_array to same ndim as weight array ndim = K.ndim(score_array) weight_ndim = K.ndim(weights) score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim))) score_array *= weights score_array /= K.mean(K.cast(K.not_equal(weights, 0), K.floatx())) return K.mean(score_array) return weighted def standardize_weights(y, sample_weight=None, class_weight=None, sample_weight_mode=None): """Performs sample weight validation and standardization. Everything gets normalized to a single sample-wise (or timestep-wise) weight array. # Arguments y: Numpy array of model targets to be weighted. sample_weight: User-provided `sample_weight` argument. class_weight: User-provided `class_weight` argument. sample_weight_mode: One of `None` or `"temporal"`. `"temporal"` indicated that we expect 2D weight data that will be applied to the last 2 dimensions of the targets (i.e. we are weighting timesteps, not samples). # Returns A numpy array of target weights, one entry per sample to weight. # Raises ValueError: In case of invalid user-provided arguments. """ if sample_weight_mode is not None: if sample_weight_mode != 'temporal': raise ValueError('"sample_weight_mode ' 'should be None or "temporal". ' 'Found: ' + str(sample_weight_mode)) if len(y.shape) < 3: raise ValueError('Found a sample_weight array for ' 'an input with shape ' + str(y.shape) + '. ' 'Timestep-wise sample weighting (use of ' 'sample_weight_mode="temporal") is restricted to ' 'outputs that are at least 3D, i.e. that have ' 'a time dimension.') if sample_weight is not None and len(sample_weight.shape) != 2: raise ValueError('Found a sample_weight array with shape ' + str(sample_weight.shape) + '. ' 'In order to use timestep-wise sample weighting, ' 'you should pass a 2D sample_weight array.') else: if sample_weight is not None and len(sample_weight.shape) != 1: raise ValueError('Found a sample_weight array with shape ' + str(sample_weight.shape) + '. ' 'In order to use timestep-wise sample weights, ' 'you should specify ' 'sample_weight_mode="temporal" ' 'in compile(). If you just mean to use ' 'sample-wise weights, make sure your ' 'sample_weight array is 1D.') if sample_weight is not None and class_weight is not None: warnings.warn('Found both `sample_weight` and `class_weight`: ' '`class_weight` argument will be ignored.') if sample_weight is not None: if len(sample_weight.shape) > len(y.shape): raise ValueError('Found a sample_weight with shape' + str(sample_weight.shape) + '.' 'Expected sample_weight with rank ' 'less than or equal to ' + str(len(y.shape))) if y.shape[:sample_weight.ndim] != sample_weight.shape: raise ValueError('Found a sample_weight array with shape ' + str(sample_weight.shape) + ' for an input with shape ' + str(y.shape) + '. ' 'sample_weight cannot be broadcast.') return sample_weight elif isinstance(class_weight, dict): if len(y.shape) > 2: raise ValueError('`class_weight` not supported for ' '3+ dimensional targets.') if y.shape[1] > 1: y_classes = np.argmax(y, axis=1) elif y.shape[1] == 1: y_classes = np.reshape(y, y.shape[0]) else: y_classes = y weights = np.asarray([class_weight[cls] for cls in y_classes if cls in class_weight]) if len(weights) != len(y_classes): # subtract the sets to pick all missing classes existing_classes = set(y_classes) existing_class_weight = set(class_weight.keys()) raise ValueError('`class_weight` must contain ' 'all classes in the data.' ' The classes %s exist in the data but not in ' '`class_weight`.' % (existing_classes - existing_class_weight)) return weights else: if sample_weight_mode is None: return np.ones((y.shape[0],), dtype=K.floatx()) else: return np.ones((y.shape[0], y.shape[1]), dtype=K.floatx()) def check_num_samples(ins, batch_size=None, steps=None, steps_name='steps'): """Checks the number of samples provided for training and evaluation. The number of samples is not defined when running with `steps`, in which case the number of samples is set to `None`. # Arguments ins: List of tensors to be fed to the Keras function. batch_size: Integer batch size or `None` if not defined. steps: Total number of steps (batches of samples) before declaring `predict_loop` finished. Ignored with the default value of `None`. steps_name: The public API's parameter name for `steps`. # Raises ValueError: when `steps` is `None` and the attribute `ins.shape` does not exist. Also raises ValueError when `steps` is not `None` and `batch_size` is not `None` because they are mutually exclusive. # Returns When steps is `None`, returns the number of samples to be processed based on the size of the first dimension of the first input numpy array. When steps is not `None` and `batch_size` is `None`, returns `None`. # Raises ValueError: In case of invalid arguments. """ if steps is not None and batch_size is not None: raise ValueError( 'If ' + steps_name + ' is set, the `batch_size` must be None.') if not ins or any(K.is_tensor(x) for x in ins): if steps is None: raise ValueError( 'If your data is in the form of symbolic tensors, ' 'you should specify the `' + steps_name + '` argument ' '(instead of the `batch_size` argument, ' 'because symbolic tensors are expected to produce ' 'batches of input data).') return None if hasattr(ins[0], 'shape'): return int(ins[0].shape[0]) return None # Edge case where ins == [static_learning_phase] def iter_sequence_infinite(seq): """Iterate indefinitely over a Sequence. # Arguments seq: Sequence object # Returns Generator yielding batches. """ while True: for item in seq: yield item Keras-2.2.4/keras/engine/network.py0000644000000000116100000016527513354530144016760 0ustar rooteng00000000000000"""A `Network` is way to compose layers: the topological form of a `Model`. """ from __future__ import print_function from __future__ import absolute_import from __future__ import division import numpy as np import json import yaml import warnings import copy import os from six.moves import zip from . import saving from .base_layer import Layer from .base_layer import Node from .input_layer import InputLayer from .. import backend as K from ..utils.io_utils import ask_to_proceed_with_overwrite from ..utils.layer_utils import print_summary as print_layer_summary from ..utils.layer_utils import get_source_inputs from ..utils.generic_utils import has_arg from ..utils.generic_utils import to_list from ..utils.generic_utils import object_list_uid from ..utils.generic_utils import unpack_singleton from ..legacy import interfaces try: import h5py except ImportError: h5py = None class Network(Layer): """A Network is a directed acyclic graph of layers. It is the topological form of a "model". A Model is simply a Network with added training routines. # Properties name inputs outputs layers input_spec (list of class instances) each entry describes one required input: - ndim - dtype trainable (boolean) input_shape output_shape weights (list of variables) trainable_weights (list of variables) non_trainable_weights (list of variables) losses updates state_updates stateful # Methods __call__ summary get_layer get_weights set_weights get_config compute_output_shape save add_loss add_update get_losses_for get_updates_for to_json to_yaml reset_states # Class Methods from_config # Raises TypeError: if input tensors are not Keras tensors (tensors returned by `Input`). """ @interfaces.legacy_model_constructor_support def __init__(self, *args, **kwargs): # Signature detection if (len(args) == 2 or len(args) == 1 and 'outputs' in kwargs or 'inputs' in kwargs and 'outputs' in kwargs): # Graph network self._init_graph_network(*args, **kwargs) else: # Subclassed network self._init_subclassed_network(**kwargs) def _base_init(self, name=None): # The following are implemented as property functions: # self.trainable_weights # self.non_trainable_weights # self.input_spec # self.losses # self.updates # Handle `name` argument. if not name: prefix = self.__class__.__name__.lower() name = prefix + '_' + str(K.get_uid(prefix)) self.name = name # This acts just like the `trainable` attribute of any layer instance. # It does not affect users of the underlying layers, only users of the # Network instance. self.trainable = True self._is_compiled = False self._expects_training_arg = False self._initial_weights = None self.supports_masking = False if not hasattr(self, 'optimizer'): # Don't reset optimizer if already set. self.optimizer = None # Private attributes to implement compatibility with Layer. self._updates = [] self._losses = [] self._per_input_losses = {} self._per_input_updates = {} # All layers in order of horizontal graph traversal. # Entries are unique. Includes input and output layers. self._layers = [] # Used only in conjunction with graph-networks self._outbound_nodes = [] self._inbound_nodes = [] def _init_graph_network(self, inputs, outputs, name=None): self._uses_inputs_arg = True # Normalize and set self.inputs, self.outputs. self.inputs = to_list(inputs, allow_tuple=True) self.outputs = to_list(outputs, allow_tuple=True) # User-provided argument validation. # Check for redundancy in inputs. if len(set(self.inputs)) != len(self.inputs): raise ValueError('The list of inputs passed to the model ' 'is redundant. ' 'All inputs should only appear once.' ' Found: ' + str(self.inputs)) for x in self.inputs: # Check that x has appropriate `_keras_history` metadata. if not hasattr(x, '_keras_history'): cls_name = self.__class__.__name__ raise ValueError('Input tensors to a ' + cls_name + ' ' + 'must come from `keras.layers.Input`. ' 'Received: ' + str(x) + ' (missing previous layer metadata).') # Check that x is an input tensor. layer, node_index, tensor_index = x._keras_history if (len(layer._inbound_nodes) > 1 or (layer._inbound_nodes and layer._inbound_nodes[0].inbound_layers)): cls_name = self.__class__.__name__ warnings.warn(cls_name + ' inputs must come from ' '`keras.layers.Input` ' '(thus holding past layer metadata), ' 'they cannot be the output of ' 'a previous non-Input layer. ' 'Here, a tensor specified as ' 'input to your model ' 'was not an Input tensor, ' 'it was generated by layer ' + layer.name + '.\n' 'Note that input tensors are ' 'instantiated via ' '`tensor = keras.layers.Input(shape)`.\n' 'The tensor that caused the issue was: ' + str(x.name)) for x in self.outputs: if not hasattr(x, '_keras_history'): cls_name = self.__class__.__name__ raise ValueError('Output tensors to a ' + cls_name + ' must be ' 'the output of a Keras `Layer` ' '(thus holding past layer metadata). ' 'Found: ' + str(x)) self._base_init(name=name) self._compute_previous_mask = ( has_arg(self.call, 'mask') or hasattr(self, 'compute_mask')) # A Network does not create weights of its own, # thus it is already built. self.built = True self._is_graph_network = True self._input_layers = [] self._output_layers = [] self._input_coordinates = [] self._output_coordinates = [] # This is for performance optimization when calling the Network on new # inputs. Every time the Network is called on a set on input tensors, # we compute the output tensors, # output masks and output shapes in one pass, # then cache them here. When any of these outputs is queried later, we # retrieve it from there instead of recomputing it. self._output_mask_cache = {} self._output_tensor_cache = {} self._output_shape_cache = {} # Build self._output_layers: for x in self.outputs: layer, node_index, tensor_index = x._keras_history self._output_layers.append(layer) self._output_coordinates.append((layer, node_index, tensor_index)) # Build self._input_layers: for x in self.inputs: layer, node_index, tensor_index = x._keras_history # It's supposed to be an input layer, so only one node # and one tensor output. assert node_index == 0 assert tensor_index == 0 self._input_layers.append(layer) self._input_coordinates.append((layer, node_index, tensor_index)) # Keep track of the network's nodes and layers. nodes, nodes_by_depth, layers, layers_by_depth = _map_graph_network( self.inputs, self.outputs) self._network_nodes = nodes self._nodes_by_depth = nodes_by_depth self._layers = layers self._layers_by_depth = layers_by_depth # Create the node linking internal inputs to internal outputs. Node(outbound_layer=self, inbound_layers=[], node_indices=[], tensor_indices=[], input_tensors=self.inputs, output_tensors=self.outputs, # No network-level masking for now. input_masks=[None for _ in self.inputs], output_masks=[None for _ in self.outputs], input_shapes=[x._keras_shape for x in self.inputs], output_shapes=[x._keras_shape for x in self.outputs]) # Fill in the output mask cache. masks = [] for x in self.inputs: layer, node_index, tensor_index = x._keras_history node = layer._inbound_nodes[node_index] mask = node.output_masks[tensor_index] masks.append(mask) mask_cache_key = object_list_uid(inputs) mask_cache_key += '_' + object_list_uid(masks) masks = [] for x in self.outputs: layer, node_index, tensor_index = x._keras_history node = layer._inbound_nodes[node_index] mask = node.output_masks[tensor_index] masks.append(mask) mask = unpack_singleton(masks) self._output_mask_cache[mask_cache_key] = mask # Build self.input_names and self.output_names. self.input_names = [] self.output_names = [] self._feed_input_names = [] self._feed_inputs = [] self._feed_input_shapes = [] for i, layer in enumerate(self._input_layers): # Check that layer is an InputLayer. if not isinstance(layer, InputLayer): raise TypeError( 'Input layers to a `Model` must be `InputLayer` objects. ' 'Received inputs: {}. ' 'Input {} (0-based) originates ' 'from layer type `{}`.'.format(inputs, i, layer.__class__.__name__)) self.input_names.append(layer.name) if layer.is_placeholder: self._feed_inputs.append(layer.input) self._feed_input_names.append(layer.name) self._feed_input_shapes.append(self.inputs[i]._keras_shape) for layer in self._output_layers: self.output_names.append(layer.name) def _init_subclassed_network(self, name=None): self._base_init(name=name) self._is_graph_network = False self._expects_training_arg = has_arg(self.call, 'training') self._uses_inputs_arg = has_arg(self.call, 'inputs') self.outputs = None self.inputs = None self.built = False def __setattr__(self, name, value): # Automatically track layers set as Model # attributes for subclassed Models. if isinstance(value, (Layer, Network)): try: is_graph_network = self._is_graph_network except AttributeError: raise RuntimeError( 'It looks like you are subclassing `Model` and you ' 'forgot to call `super(YourClass, self).__init__()`.' ' Always start with this line.') if not is_graph_network: if value not in self._layers: self._layers.append(value) super(Network, self).__setattr__(name, value) @property def layers(self): return self._layers def get_layer(self, name=None, index=None): """Retrieves a layer based on either its name (unique) or index. If `name` and `index` are both provided, `index` will take precedence. Indices are based on order of horizontal graph traversal (bottom-up). # Arguments name: String, name of layer. index: Integer, index of layer. # Returns A layer instance. # Raises ValueError: In case of invalid layer name or index. """ # It would be unreliable to build a dictionary # based on layer names, because names can potentially # be changed at any point by the user # without the network being notified of it. if index is not None: if len(self.layers) <= index: raise ValueError('Was asked to retrieve layer at index ' + str(index) + ' but model only has ' + str(len(self.layers)) + ' layers.') else: return self.layers[index] else: if not name: raise ValueError('Provide either a layer name or layer index.') for layer in self.layers: if layer.name == name: return layer raise ValueError('No such layer: ' + name) @property def updates(self): """Retrieves the model's updates. Will only include updates that are either unconditional, or conditional on inputs to this model (e.g. will not include updates that depend on tensors that aren't inputs to this model). # Returns A list of update ops. """ if not self.trainable and not self.stateful: return [] updates = [] for layer in self.layers: if hasattr(layer, 'updates'): if self._is_graph_network: # Collect updates that are dependent on inputs # that are part of the model. for node_index, node in enumerate(layer._inbound_nodes): node_key = self._node_key(layer, node_index) if node_key in self._network_nodes: # The model owns this layer node. inputs = node.input_tensors updates += layer.get_updates_for(inputs) # Collect unconditional updates. updates += layer.get_updates_for(None) else: updates += layer.updates return updates @property def losses(self): """Retrieves the model's losses. Will only include losses that are either unconditional, or conditional on inputs to this model (e.g. will not include losses that depend on tensors that aren't inputs to this model). # Returns A list of loss tensors. """ losses = [] for layer in self.layers: if hasattr(layer, 'losses'): if self._is_graph_network: # Collect losses that are dependent on inputs # that are part of the model. for node_index, node in enumerate(layer._inbound_nodes): node_key = self._node_key(layer, node_index) if node_key in self._network_nodes: # The model owns this layer node. inputs = node.input_tensors losses += layer.get_losses_for(inputs) # Collect unconditional losses. losses += layer.get_losses_for(None) else: losses += layer.losses # Add any potential unconditional model-level loss. losses += self.get_losses_for(None) unique_tensors = list( set(x for x in losses if not isinstance(x, (float, int)))) non_tensors = [x for x in losses if isinstance(x, (float, int))] return unique_tensors + non_tensors @property def uses_learning_phase(self): if not self.outputs: return False return any([x._uses_learning_phase for x in self.outputs]) @property def stateful(self): return any([(hasattr(layer, 'stateful') and layer.stateful) for layer in self.layers]) def reset_states(self): for layer in self.layers: if hasattr(layer, 'reset_states') and getattr(layer, 'stateful', False): layer.reset_states() @property def state_updates(self): """Returns the `updates` from all layers that are stateful. This is useful for separating training updates and state updates, e.g. when we need to update a layer's internal state during prediction. # Returns A list of update ops. """ state_updates = [] for layer in self.layers: if layer.stateful: state_updates += layer.updates return state_updates @property def trainable_weights(self): if not self.trainable: return [] weights = [] for layer in self.layers: weights += layer.trainable_weights return weights @property def non_trainable_weights(self): weights = [] for layer in self.layers: weights += layer.non_trainable_weights if not self.trainable: trainable_weights = [] for layer in self.layers: trainable_weights += layer.trainable_weights return trainable_weights + weights return weights def get_weights(self): """Retrieves the weights of the model. # Returns A flat list of Numpy arrays. """ weights = [] for layer in self.layers: weights += layer.weights return K.batch_get_value(weights) def set_weights(self, weights): """Sets the weights of the model. # Arguments weights: A list of Numpy arrays with shapes and types matching the output of `model.get_weights()`. """ tuples = [] for layer in self.layers: num_param = len(layer.weights) layer_weights = weights[:num_param] for sw, w in zip(layer.weights, layer_weights): tuples.append((sw, w)) weights = weights[num_param:] K.batch_set_value(tuples) @property def input_spec(self): """Gets the model's input specs. # Returns A list of `InputSpec` instances (one per input to the model) or a single instance if the model has only one input. """ if not self._is_graph_network: # TODO: support it in subclassed networks after inputs are set. return None specs = [] for layer in getattr(self, '_input_layers', []): if layer.input_spec is None: specs.append(None) else: if not isinstance(layer.input_spec, list): raise TypeError('Layer ' + layer.name + ' has an input_spec attribute that ' 'is not a list. We expect a list. ' 'Found input_spec = ' + str(layer.input_spec)) specs += layer.input_spec return unpack_singleton(specs) def call(self, inputs, mask=None): """Calls the model on new inputs. In this case `call` just reapplies all ops in the graph to the new inputs (e.g. build a new computational graph from the provided inputs). A model is callable on non-Keras tensors. # Arguments inputs: A tensor or list of tensors. mask: A mask or list of masks. A mask can be either a tensor or None (no mask). # Returns A tensor if there is a single output, or a list of tensors if there are more than one outputs. """ inputs = to_list(inputs) if mask is None: masks = [None for _ in range(len(inputs))] else: masks = to_list(mask) cache_key = object_list_uid(inputs) cache_key += '_' + object_list_uid(masks) if cache_key in self._output_tensor_cache: return self._output_tensor_cache[cache_key] else: output_tensors, _, _ = self.run_internal_graph(inputs, masks) return output_tensors def compute_mask(self, inputs, mask): if not self._is_graph_network: return None inputs = to_list(inputs) if mask is None: masks = [None for _ in range(len(inputs))] else: masks = to_list(mask) cache_key = object_list_uid(inputs) cache_key += '_' + object_list_uid(masks) if cache_key in self._output_mask_cache: return self._output_mask_cache[cache_key] else: _, output_masks, _ = self.run_internal_graph(inputs, masks) return output_masks def compute_output_shape(self, input_shape): if not self._is_graph_network: # Must be implemented by subclasses. raise NotImplementedError input_shapes = to_list(input_shape) if len(input_shapes) != len(self._input_layers): raise ValueError('Invalid input_shape argument ' + str(input_shape) + ': model has ' + str(len(self._input_layers)) + ' tensor inputs.') cache_key = ', '.join([str(x) for x in input_shapes]) if cache_key in self._output_shape_cache: output_shapes = self._output_shape_cache[cache_key] if isinstance(output_shapes, list): return unpack_singleton(output_shapes) return output_shapes else: # Bad luck, we have to run the graph manually. layers_to_output_shapes = {} for i in range(len(input_shapes)): layer = self._input_layers[i] input_shape = input_shapes[i] # It's an input layer: compute_output_shape is identity, # and there is only one node and one tensor output. shape_key = layer.name + '_0_0' layers_to_output_shapes[shape_key] = input_shape depth_keys = list(self._nodes_by_depth.keys()) depth_keys.sort(reverse=True) # Iterate over nodes, by depth level. if len(depth_keys) > 1: for depth in depth_keys: nodes = self._nodes_by_depth[depth] for node in nodes: # This is always a single layer, never a list. layer = node.outbound_layer if layer in self._input_layers: # We've already covered the input layers # a few lines above. continue # Potentially redundant list, # same size of node.input_tensors. input_shapes = [] for j in range(len(node.inbound_layers)): inbound_layer = node.inbound_layers[j] node_index = node.node_indices[j] tensor_index = node.tensor_indices[j] shape_key = inbound_layer.name shape_key += '_%s_%s' % (node_index, tensor_index) input_shape = layers_to_output_shapes[shape_key] input_shapes.append(input_shape) output_shape = layer.compute_output_shape( unpack_singleton(input_shapes)) output_shapes = to_list(output_shape) node_index = layer._inbound_nodes.index(node) for j in range(len(output_shapes)): shape_key = layer.name + '_%s_%s' % (node_index, j) layers_to_output_shapes[shape_key] = output_shapes[j] # Read final output shapes from layers_to_output_shapes. output_shapes = [] output_shape_keys = [] for i in range(len(self._output_layers)): layer = self._output_layers[i] node_index = self._output_coordinates[i][1] tensor_index = self._output_coordinates[i][2] shape_key = layer.name + '_%s_%s' % (node_index, tensor_index) output_shape_keys.append(shape_key) for i, key in enumerate(output_shape_keys): assert key in layers_to_output_shapes output_shapes.append(layers_to_output_shapes[key]) # Store in cache. self._output_shape_cache[cache_key] = output_shapes if isinstance(output_shapes, list): return unpack_singleton(output_shapes) return output_shapes def run_internal_graph(self, inputs, masks=None): """Computes output tensors for new inputs. # Note: - Expects `inputs` to be a list (potentially with 1 element). - Can be run on non-Keras tensors. # Arguments inputs: List of tensors masks: List of masks (tensors or None). # Returns Three lists: output_tensors, output_masks, output_shapes """ if masks is None: masks = [None for _ in range(len(inputs))] # Dictionary mapping reference tensors to tuples # (computed tensor, compute mask) # we assume a 1:1 mapping from tensor to mask # TODO: raise exception when a `.compute_mask()` call # does not return a list the same size as `call` tensor_map = {} for x, y, mask in zip(self.inputs, inputs, masks): tensor_map[str(id(x))] = (y, mask) depth_keys = list(self._nodes_by_depth.keys()) depth_keys.sort(reverse=True) for depth in depth_keys: nodes = self._nodes_by_depth[depth] for node in nodes: # This is always a single layer, never a list. layer = node.outbound_layer reference_input_tensors = node.input_tensors reference_output_tensors = node.output_tensors # If all previous input tensors are available in tensor_map, # then call node.inbound_layer on them. computed_data = [] # List of tuples (input, mask). for x in reference_input_tensors: if str(id(x)) in tensor_map: computed_data.append(tensor_map[str(id(x))]) if len(computed_data) == len(reference_input_tensors): # call layer with K.name_scope(layer.name): if node.arguments: kwargs = node.arguments else: kwargs = {} if len(computed_data) == 1: computed_tensor, computed_mask = computed_data[0] if has_arg(layer.call, 'mask'): if 'mask' not in kwargs: kwargs['mask'] = computed_mask output_tensors = to_list( layer.call(computed_tensor, **kwargs)) output_masks = layer.compute_mask(computed_tensor, computed_mask) if output_masks is None: output_masks = [None for _ in output_tensors] else: output_masks = to_list(output_masks) computed_tensors = [computed_tensor] # computed_masks might be used in the future. computed_masks = [computed_mask] else: computed_tensors = [x[0] for x in computed_data] computed_masks = [x[1] for x in computed_data] if has_arg(layer.call, 'mask'): if 'mask' not in kwargs: kwargs['mask'] = computed_masks output_tensors = to_list( layer.call(computed_tensors, **kwargs)) output_masks = layer.compute_mask(computed_tensors, computed_masks) if output_masks is None: output_masks = [None for _ in output_tensors] else: output_masks = to_list(output_masks) # Apply activity regularizer if any: if (hasattr(layer, 'activity_regularizer') and layer.activity_regularizer is not None): with K.name_scope('activity_regularizer'): regularization_losses = [ layer.activity_regularizer(x) for x in output_tensors] layer.add_loss(regularization_losses, inputs=computed_tensors) if len(output_masks) != len(output_tensors): raise Exception( 'Layers should have equal number of output tensors ' 'and output masks. Layer ' + str(layer.name) + ' has' ' ' + str(len(output_tensors)) + ' output tensors ' 'and ' + str(len(output_masks)) + ' output masks.') # Update model updates and losses: # Keep track of updates that depend on the inputs # (e.g. BN updates). self.add_update(layer.get_updates_for(computed_tensors), inputs) # Keep track of unconditional updates (e.g. a counter). self.add_update(layer.get_updates_for(None), None) # Keep track of losses that depend on the inputs # (e.g. activity regularizers). self.add_loss(layer.get_losses_for(computed_tensors), inputs) # Keep track of unconditional losses # (e.g. weight regularizers). self.add_loss(layer.get_losses_for(None), None) # Update _keras_shape. if all([hasattr(x, '_keras_shape') for x in computed_tensors]): input_shapes = unpack_singleton( [x._keras_shape for x in computed_tensors]) shapes = to_list(layer.compute_output_shape(input_shapes)) uses_learning_phase = any( [x._uses_learning_phase for x in computed_tensors]) for x, s in zip(output_tensors, shapes): x._keras_shape = s _u = getattr(x, '_uses_learning_phase', False) x._uses_learning_phase = _u or uses_learning_phase # Update tensor_map. for x, y, mask in zip(reference_output_tensors, output_tensors, output_masks): tensor_map[str(id(x))] = (y, mask) output_tensors = [] output_masks = [] output_shapes = [] for x in self.outputs: assert str(id(x)) in tensor_map, 'Could not compute output ' + str(x) tensor, mask = tensor_map[str(id(x))] if hasattr(tensor, '_keras_shape') and output_shapes is not None: shape = tensor._keras_shape output_shapes.append(shape) else: output_shapes = None output_tensors.append(tensor) output_masks.append(mask) # Update cache; # keys are based on ids on input tensors and inputs masks. cache_key = object_list_uid(inputs) cache_key += '_' + object_list_uid(masks) output_tensors = unpack_singleton(output_tensors) self._output_tensor_cache[cache_key] = output_tensors output_masks = unpack_singleton(output_masks) self._output_mask_cache[cache_key] = output_masks if output_shapes is not None: input_shapes = [x._keras_shape for x in inputs] cache_key = ', '.join([str(x) for x in input_shapes]) output_shapes = unpack_singleton(output_shapes) self._output_shape_cache[cache_key] = output_shapes return output_tensors, output_masks, output_shapes def get_config(self): if not self._is_graph_network: # Subclassed networks are not serializable # (unless serialization is implemented by # the author of the subclassed network). raise NotImplementedError config = { 'name': self.name, } # Build a map from a layer unique name (self._node_key) # to the index of the nodes that are saved in the config. # Only nodes in network_nodes are saved. node_conversion_map = {} for layer in self.layers: if issubclass(layer.__class__, Network): # Networks start with a pre-existing node # linking their input to output. kept_nodes = 1 else: kept_nodes = 0 for original_node_index, node in enumerate(layer._inbound_nodes): node_key = self._node_key(layer, original_node_index) if node_key in self._network_nodes: # i.e. we mark it to be saved node_conversion_map[node_key] = kept_nodes kept_nodes += 1 # serialize and save the layers in layer_configs layer_configs = [] for layer in self.layers: # From the earliest layers on. layer_class_name = layer.__class__.__name__ layer_config = layer.get_config() filtered_inbound_nodes = [] for original_node_index, node in enumerate(layer._inbound_nodes): node_key = self._node_key(layer, original_node_index) if node_key in self._network_nodes: # The node is relevant to the model: # add to filtered_inbound_nodes. if node.arguments: try: json.dumps(node.arguments) kwargs = node.arguments except TypeError: warnings.warn( 'Layer ' + layer.name + ' was passed non-serializable ' 'keyword arguments: ' + str(node.arguments) + '. They will not be included ' 'in the serialized model ' '(and thus will be missing ' 'at deserialization time).') kwargs = {} else: kwargs = {} if node.inbound_layers: node_data = [] for i in range(len(node.inbound_layers)): inbound_layer = node.inbound_layers[i] node_index = node.node_indices[i] tensor_index = node.tensor_indices[i] new_node_index = node_conversion_map.get( self._node_key(inbound_layer, node_index), 0) node_data.append([inbound_layer.name, new_node_index, tensor_index, kwargs]) filtered_inbound_nodes.append(node_data) layer_configs.append({ 'name': layer.name, 'class_name': layer_class_name, 'config': layer_config, 'inbound_nodes': filtered_inbound_nodes, }) config['layers'] = layer_configs # Gather info about inputs and outputs. model_inputs = [] for i in range(len(self._input_layers)): layer = self._input_layers[i] node_index = self._input_coordinates[i][1] node_key = self._node_key(layer, node_index) if node_key not in self._network_nodes: continue new_node_index = node_conversion_map[node_key] tensor_index = self._input_coordinates[i][2] model_inputs.append([layer.name, new_node_index, tensor_index]) config['input_layers'] = model_inputs model_outputs = [] for i in range(len(self._output_layers)): layer = self._output_layers[i] node_index = self._output_coordinates[i][1] node_key = self._node_key(layer, node_index) if node_key not in self._network_nodes: continue new_node_index = node_conversion_map[node_key] tensor_index = self._output_coordinates[i][2] model_outputs.append([layer.name, new_node_index, tensor_index]) config['output_layers'] = model_outputs return copy.deepcopy(config) @classmethod def from_config(cls, config, custom_objects=None): """Instantiates a Model from its config (output of `get_config()`). # Arguments config: Model config dictionary. custom_objects: Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization. # Returns A model instance. # Raises ValueError: In case of improperly formatted config dict. """ # Layer instances created during # the graph reconstruction process created_layers = {} # Dictionary mapping layer instances to # node data that specifies a layer call. # It acts as a queue that maintains any unprocessed # layer call until it becomes possible to process it # (i.e. until the input tensors to the call all exist). unprocessed_nodes = {} def add_unprocessed_node(layer, node_data): if layer not in unprocessed_nodes: unprocessed_nodes[layer] = [node_data] else: unprocessed_nodes[layer].append(node_data) def process_node(layer, node_data): input_tensors = [] for input_data in node_data: inbound_layer_name = input_data[0] inbound_node_index = input_data[1] inbound_tensor_index = input_data[2] if len(input_data) == 3: kwargs = {} elif len(input_data) == 4: kwargs = input_data[3] else: raise ValueError('Improperly formatted model config.') if inbound_layer_name not in created_layers: add_unprocessed_node(layer, node_data) return inbound_layer = created_layers[inbound_layer_name] if len(inbound_layer._inbound_nodes) <= inbound_node_index: add_unprocessed_node(layer, node_data) return inbound_node = inbound_layer._inbound_nodes[inbound_node_index] input_tensors.append( inbound_node.output_tensors[inbound_tensor_index]) # Call layer on its inputs, thus creating the node # and building the layer if needed. if input_tensors: layer(unpack_singleton(input_tensors), **kwargs) def process_layer(layer_data): """Deserializes a layer, then call it on appropriate inputs. # Arguments layer_data: layer config dict. # Raises ValueError: In case of improperly formatted `layer_data` dict. """ layer_name = layer_data['name'] # Instantiate layer. from ..layers import deserialize as deserialize_layer layer = deserialize_layer(layer_data, custom_objects=custom_objects) created_layers[layer_name] = layer # Gather layer inputs. inbound_nodes_data = layer_data['inbound_nodes'] for node_data in inbound_nodes_data: # We don't process nodes (i.e. make layer calls) # on the fly because the inbound node may not yet exist, # in case of layer shared at different topological depths # (e.g. a model such as A(B(A(B(x))))) add_unprocessed_node(layer, node_data) # First, we create all layers and enqueue nodes to be processed for layer_data in config['layers']: process_layer(layer_data) # Then we process nodes in order of layer depth. # Nodes that cannot yet be processed (if the inbound node # does not yet exist) are re-enqueued, and the process # is repeated until all nodes are processed. while unprocessed_nodes: for layer_data in config['layers']: layer = created_layers[layer_data['name']] if layer in unprocessed_nodes: for node_data in unprocessed_nodes.pop(layer): process_node(layer, node_data) name = config.get('name') input_tensors = [] output_tensors = [] for layer_data in config['input_layers']: layer_name, node_index, tensor_index = layer_data assert layer_name in created_layers layer = created_layers[layer_name] layer_output_tensors = layer._inbound_nodes[node_index].output_tensors input_tensors.append(layer_output_tensors[tensor_index]) for layer_data in config['output_layers']: layer_name, node_index, tensor_index = layer_data assert layer_name in created_layers layer = created_layers[layer_name] layer_output_tensors = layer._inbound_nodes[node_index].output_tensors output_tensors.append(layer_output_tensors[tensor_index]) return cls(inputs=input_tensors, outputs=output_tensors, name=name) def save(self, filepath, overwrite=True, include_optimizer=True): """Saves the model to a single HDF5 file. The savefile includes: - The model architecture, allowing to re-instantiate the model. - The model weights. - The state of the optimizer, allowing to resume training exactly where you left off. This allows you to save the entirety of the state of a model in a single file. Saved models can be reinstantiated via `keras.models.load_model`. The model returned by `load_model` is a compiled model ready to be used (unless the saved model was never compiled in the first place). # Arguments filepath: String, path to the file to save the weights to. overwrite: Whether to silently overwrite any existing file at the target location, or provide the user with a manual prompt. include_optimizer: If True, save optimizer's state together. # Example ```python from keras.models import load_model model.save('my_model.h5') # creates a HDF5 file 'my_model.h5' del model # deletes the existing model # returns a compiled model # identical to the previous one model = load_model('my_model.h5') ``` """ if not self._is_graph_network: raise NotImplementedError from ..models import save_model save_model(self, filepath, overwrite, include_optimizer) def save_weights(self, filepath, overwrite=True): """Dumps all layer weights to a HDF5 file. The weight file has: - `layer_names` (attribute), a list of strings (ordered names of model layers). - For every layer, a `group` named `layer.name` - For every such layer group, a group attribute `weight_names`, a list of strings (ordered names of weights tensor of the layer). - For every weight in the layer, a dataset storing the weight value, named after the weight tensor. # Arguments filepath: String, path to the file to save the weights to. overwrite: Whether to silently overwrite any existing file at the target location, or provide the user with a manual prompt. # Raises ImportError: If h5py is not available. """ if h5py is None: raise ImportError('`save_weights` requires h5py.') # If file exists and should not be overwritten: if not overwrite and os.path.isfile(filepath): proceed = ask_to_proceed_with_overwrite(filepath) if not proceed: return with h5py.File(filepath, 'w') as f: saving.save_weights_to_hdf5_group(f, self.layers) f.flush() def load_weights(self, filepath, by_name=False, skip_mismatch=False, reshape=False): """Loads all layer weights from a HDF5 save file. If `by_name` is False (default) weights are loaded based on the network's topology, meaning the architecture should be the same as when the weights were saved. Note that layers that don't have weights are not taken into account in the topological ordering, so adding or removing layers is fine as long as they don't have weights. If `by_name` is True, weights are loaded into layers only if they share the same name. This is useful for fine-tuning or transfer-learning models where some of the layers have changed. # Arguments filepath: String, path to the weights file to load. by_name: Boolean, whether to load weights by name or by topological order. skip_mismatch: Boolean, whether to skip loading of layers where there is a mismatch in the number of weights, or a mismatch in the shape of the weight (only valid when `by_name`=True). reshape: Reshape weights to fit the layer when the correct number of weight arrays is present but their shape does not match. # Raises ImportError: If h5py is not available. """ if h5py is None: raise ImportError('`load_weights` requires h5py.') with h5py.File(filepath, mode='r') as f: if 'layer_names' not in f.attrs and 'model_weights' in f: f = f['model_weights'] if by_name: saving.load_weights_from_hdf5_group_by_name( f, self.layers, skip_mismatch=skip_mismatch, reshape=reshape) else: saving.load_weights_from_hdf5_group( f, self.layers, reshape=reshape) def _updated_config(self): """Util hared between different serialization methods. # Returns Model config with Keras version information added. """ from .. import __version__ as keras_version config = self.get_config() model_config = { 'class_name': self.__class__.__name__, 'config': config, 'keras_version': keras_version, 'backend': K.backend() } return model_config def to_json(self, **kwargs): """Returns a JSON string containing the network configuration. To load a network from a JSON save file, use `keras.models.model_from_json(json_string, custom_objects={})`. # Arguments **kwargs: Additional keyword arguments to be passed to `json.dumps()`. # Returns A JSON string. """ def get_json_type(obj): # If obj is any numpy type if type(obj).__module__ == np.__name__: if isinstance(obj, np.ndarray): return obj.tolist() else: return obj.item() # If obj is a python 'type' if type(obj).__name__ == type.__name__: return obj.__name__ raise TypeError('Not JSON Serializable:', obj) model_config = self._updated_config() return json.dumps(model_config, default=get_json_type, **kwargs) def to_yaml(self, **kwargs): """Returns a yaml string containing the network configuration. To load a network from a yaml save file, use `keras.models.model_from_yaml(yaml_string, custom_objects={})`. `custom_objects` should be a dictionary mapping the names of custom losses / layers / etc to the corresponding functions / classes. # Arguments **kwargs: Additional keyword arguments to be passed to `yaml.dump()`. # Returns A YAML string. """ return yaml.dump(self._updated_config(), **kwargs) def summary(self, line_length=None, positions=None, print_fn=None): """Prints a string summary of the network. # Arguments line_length: Total length of printed lines (e.g. set this to adapt the display to different terminal window sizes). positions: Relative or absolute positions of log elements in each line. If not provided, defaults to `[.33, .55, .67, 1.]`. print_fn: Print function to use. It will be called on each line of the summary. You can set it to a custom function in order to capture the string summary. It defaults to `print` (prints to stdout). """ if not self.built: raise ValueError( 'This model has not yet been built. ' 'Build the model first by calling build() ' 'or calling fit() with some data. ' 'Or specify input_shape or batch_input_shape ' 'in the first layer for automatic build. ') return print_layer_summary(self, line_length=line_length, positions=positions, print_fn=print_fn) def __getstate__(self): return saving.pickle_model(self) def __setstate__(self, state): model = saving.unpickle_model(state) self.__dict__.update(model.__dict__) def _make_node_key(layer_name, node_index): return layer_name + '_ib-' + str(node_index) def _map_graph_network(inputs, outputs): """Validates a network's topology and gather its layers and nodes. # Arguments inputs: List of input tensors. outputs: List of outputs tensors. # Returns A tuple `(nodes, nodes_by_depth, layers, layers_by_depth)`. - nodes: list of Node instances. - nodes_by_depth: dict mapping ints (depth) to lists of node instances. - layers: list of Layer instances. - layers_by_depth: dict mapping ints (depth) to lists of layer instances. # Raises ValueError: In case the network is not valid (e.g. disconnected graph). """ # Network_nodes: set of nodes included in the graph of layers # (not all nodes included in the layers are relevant to the current graph). network_nodes = set() # ids of all nodes relevant to the Network nodes_depths = {} # dict {node: depth value} layers_depths = {} # dict {layer: depth value} layer_indices = {} # dict {layer: index in traversal} nodes_in_decreasing_depth = [] def build_map(tensor, finished_nodes, nodes_in_progress, layer, node_index, tensor_index): """Builds a map of the graph of layers. This recursively updates the map `layer_indices`, the list `nodes_in_decreasing_depth` and the set `network_nodes`. # Arguments: tensor: Some tensor in a graph. finished_nodes: Set of nodes whose subgraphs have been traversed completely. Useful to prevent duplicated work. nodes_in_progress: Set of nodes that are currently active on the recursion stack. Useful to detect cycles. layer: Layer from which `tensor` comes from. If not provided, will be obtained from `tensor._keras_history`. node_index: Node index from which `tensor` comes from. tensor_index: Tensor_index from which `tensor` comes from. # Raises: ValueError: if a cycle is detected. """ node = layer._inbound_nodes[node_index] # Prevent cycles. if node in nodes_in_progress: raise ValueError('The tensor ' + str(tensor) + ' at layer "' + layer.name + '" is part of a cycle.') # Don't repeat work for shared subgraphs if node in finished_nodes: return node_key = _make_node_key(layer.name, node_index) # Update network_nodes. network_nodes.add(node_key) # Store the traversal order for layer sorting. if layer not in layer_indices: layer_indices[layer] = len(layer_indices) nodes_in_progress.add(node) # Propagate to all previous tensors connected to this node. for i in range(len(node.inbound_layers)): x = node.input_tensors[i] layer = node.inbound_layers[i] node_index = node.node_indices[i] tensor_index = node.tensor_indices[i] build_map(x, finished_nodes, nodes_in_progress, layer, node_index, tensor_index) finished_nodes.add(node) nodes_in_progress.remove(node) nodes_in_decreasing_depth.append(node) finished_nodes = set() nodes_in_progress = set() for x in outputs: layer, node_index, tensor_index = x._keras_history build_map(x, finished_nodes, nodes_in_progress, layer=layer, node_index=node_index, tensor_index=tensor_index) for node in reversed(nodes_in_decreasing_depth): # If the depth is not set, the node has no outbound nodes (depth 0). depth = nodes_depths.setdefault(node, 0) # Update the depth of the corresponding layer previous_depth = layers_depths.get(node.outbound_layer, 0) # If we've seen this layer before at a higher depth, # we should use that depth instead of the node depth. # This is necessary for shared layers that have inputs at different # depth levels in the graph. depth = max(depth, previous_depth) layers_depths[node.outbound_layer] = depth nodes_depths[node] = depth # Update the depth of inbound nodes. # The "depth" of a node is the max of the depths # of all layers it is connected to. for i in range(len(node.inbound_layers)): inbound_layer = node.inbound_layers[i] node_index = node.node_indices[i] inbound_node = inbound_layer._inbound_nodes[node_index] previous_depth = nodes_depths.get(inbound_node, 0) nodes_depths[inbound_node] = max(depth + 1, previous_depth) # Build a dict {depth: list of nodes with this depth} nodes_by_depth = {} for node, depth in nodes_depths.items(): if depth not in nodes_by_depth: nodes_by_depth[depth] = [] nodes_by_depth[depth].append(node) # Build a dict {depth: list of layers with this depth} layers_by_depth = {} for layer, depth in layers_depths.items(): if depth not in layers_by_depth: layers_by_depth[depth] = [] layers_by_depth[depth].append(layer) # Get sorted list of layer depths. depth_keys = list(layers_by_depth.keys()) depth_keys.sort(reverse=True) # Set self.layers and self._layers_by_depth. layers = [] for depth in depth_keys: layers_for_depth = layers_by_depth[depth] # Network.layers needs to have a deterministic order: # here we order them by traversal order. layers_for_depth.sort(key=lambda x: layer_indices[x]) layers.extend(layers_for_depth) # Get sorted list of node depths. depth_keys = list(nodes_by_depth.keys()) depth_keys.sort(reverse=True) # Check that all tensors required are computable. # computable_tensors: all tensors in the graph # that can be computed from the inputs provided. computable_tensors = [] for x in inputs: computable_tensors.append(x) layers_with_complete_input = [] # To provide a better error msg. for depth in depth_keys: for node in nodes_by_depth[depth]: layer = node.outbound_layer if layer: for x in node.input_tensors: if x not in computable_tensors: raise ValueError('Graph disconnected: ' 'cannot obtain value for tensor ' + str(x) + ' at layer "' + layer.name + '". ' 'The following previous layers ' 'were accessed without issue: ' + str(layers_with_complete_input)) for x in node.output_tensors: computable_tensors.append(x) layers_with_complete_input.append(layer.name) # Ensure name unicity, which will be crucial for serialization # (since serialized nodes refer to layers by their name). all_names = [layer.name for layer in layers] for name in all_names: if all_names.count(name) != 1: raise ValueError('The name "' + name + '" is used ' + str(all_names.count(name)) + ' times in the model. ' 'All layer names should be unique.') return network_nodes, nodes_by_depth, layers, layers_by_depth Keras-2.2.4/keras/engine/topology.py0000644000000000116100000000033113305602621017114 0ustar rooteng00000000000000"""This module is deprecated, but kept around for backwards compatibility. """ from .base_layer import Layer, Node, InputSpec from .input_layer import Input, InputLayer from .network import Network, get_source_inputs Keras-2.2.4/keras/engine/training.py0000644000000000116100000021725113354530144017072 0ustar rooteng00000000000000"""Training-related part of the Keras engine. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import warnings import copy import numpy as np from .network import Network from .base_layer import Layer from .training_utils import collect_metrics from .training_utils import check_array_length_consistency from .training_utils import check_loss_and_target_compatibility from .training_utils import standardize_class_weights from .training_utils import standardize_input_data from .training_utils import standardize_sample_weights from .training_utils import standardize_weights from .training_utils import weighted_masked_objective from . import training_arrays from . import training_generator from .. import backend as K from .. import optimizers from .. import losses from .. import metrics as metrics_module from ..utils.generic_utils import slice_arrays from ..utils.generic_utils import to_list from ..utils.generic_utils import unpack_singleton from ..legacy import interfaces class Model(Network): """The `Model` class adds training & evaluation routines to a `Network`. """ def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, **kwargs): """Configures the model for training. # Arguments optimizer: String (name of optimizer) or optimizer instance. See [optimizers](/optimizers). loss: String (name of objective function) or objective function. See [losses](/losses). If the model has multiple outputs, you can use a different loss on each output by passing a dictionary or a list of losses. The loss value that will be minimized by the model will then be the sum of all individual losses. metrics: List of metrics to be evaluated by the model during training and testing. Typically you will use `metrics=['accuracy']`. To specify different metrics for different outputs of a multi-output model, you could also pass a dictionary, such as `metrics={'output_a': 'accuracy'}`. loss_weights: Optional list or dictionary specifying scalar coefficients (Python floats) to weight the loss contributions of different model outputs. The loss value that will be minimized by the model will then be the *weighted sum* of all individual losses, weighted by the `loss_weights` coefficients. If a list, it is expected to have a 1:1 mapping to the model's outputs. If a tensor, it is expected to map output names (strings) to scalar coefficients. sample_weight_mode: If you need to do timestep-wise sample weighting (2D weights), set this to `"temporal"`. `None` defaults to sample-wise weights (1D). If the model has multiple outputs, you can use a different `sample_weight_mode` on each output by passing a dictionary or a list of modes. weighted_metrics: List of metrics to be evaluated and weighted by sample_weight or class_weight during training and testing. target_tensors: By default, Keras will create placeholders for the model's target, which will be fed with the target data during training. If instead you would like to use your own target tensors (in turn, Keras will not expect external Numpy data for these targets at training time), you can specify them via the `target_tensors` argument. It can be a single tensor (for a single-output model), a list of tensors, or a dict mapping output names to target tensors. **kwargs: When using the Theano/CNTK backends, these arguments are passed into `K.function`. When using the TensorFlow backend, these arguments are passed into `tf.Session.run`. # Raises ValueError: In case of invalid arguments for `optimizer`, `loss`, `metrics` or `sample_weight_mode`. """ self.optimizer = optimizers.get(optimizer) self.loss = loss or [] self.metrics = metrics or [] self.loss_weights = loss_weights self.sample_weight_mode = sample_weight_mode self.weighted_metrics = weighted_metrics if not self.built: # Model is not compilable because # it does not know its number of inputs # and outputs, nor their shapes and names. # We will compile after the first # time the model gets called on training data. return self._is_compiled = True # Prepare loss functions. if isinstance(loss, dict): for name in loss: if name not in self.output_names: raise ValueError('Unknown entry in loss ' 'dictionary: "' + name + '". ' 'Only expected the following keys: ' + str(self.output_names)) loss_functions = [] for name in self.output_names: if name not in loss: warnings.warn('Output "' + name + '" missing from loss dictionary. ' 'We assume this was done on purpose, ' 'and we will not be expecting ' 'any data to be passed to "' + name + '" during training.', stacklevel=2) loss_functions.append(losses.get(loss.get(name))) elif isinstance(loss, list): if len(loss) != len(self.outputs): raise ValueError('When passing a list as loss, ' 'it should have one entry per model outputs. ' 'The model has ' + str(len(self.outputs)) + ' outputs, but you passed loss=' + str(loss)) loss_functions = [losses.get(l) for l in loss] else: loss_function = losses.get(loss) loss_functions = [loss_function for _ in range(len(self.outputs))] self.loss_functions = loss_functions weighted_losses = [ weighted_masked_objective(fn) for fn in loss_functions] skip_target_indices = [] skip_target_weighing_indices = [] self._feed_outputs = [] self._feed_output_names = [] self._feed_output_shapes = [] self._feed_loss_fns = [] for i in range(len(weighted_losses)): if weighted_losses[i] is None: skip_target_indices.append(i) skip_target_weighing_indices.append(i) # Prepare output masks. masks = self.compute_mask(self.inputs, mask=None) if masks is None: masks = [None for _ in self.outputs] masks = to_list(masks) # Prepare loss weights. if loss_weights is None: loss_weights_list = [1. for _ in range(len(self.outputs))] elif isinstance(loss_weights, dict): for name in loss_weights: if name not in self.output_names: raise ValueError('Unknown entry in loss_weights ' 'dictionary: "' + name + '". ' 'Only expected the following keys: ' + str(self.output_names)) loss_weights_list = [] for name in self.output_names: loss_weights_list.append(loss_weights.get(name, 1.)) elif isinstance(loss_weights, list): if len(loss_weights) != len(self.outputs): raise ValueError('When passing a list as loss_weights, ' 'it should have one entry per model output. ' 'The model has ' + str(len(self.outputs)) + ' outputs, but you passed loss_weights=' + str(loss_weights)) loss_weights_list = loss_weights else: raise TypeError('Could not interpret loss_weights argument: ' + str(loss_weights) + ' - expected a list of dicts.') # Prepare targets of model. self.targets = [] self._feed_targets = [] if target_tensors is not None: if isinstance(target_tensors, list): if len(target_tensors) != len(self.outputs): raise ValueError( 'When passing a list as `target_tensors`, ' 'it should have one entry per model output. ' 'The model has ' + str(len(self.outputs)) + ' outputs, but you passed target_tensors=' + str(target_tensors)) elif isinstance(target_tensors, dict): for name in target_tensors: if name not in self.output_names: raise ValueError('Unknown entry in `target_tensors` ' 'dictionary: "' + name + '". ' 'Only expected the following keys: ' + str(self.output_names)) tmp_target_tensors = [] for name in self.output_names: tmp_target_tensors.append(target_tensors.get(name, None)) target_tensors = tmp_target_tensors elif K.is_tensor(target_tensors): if len(self.outputs) != 1: raise ValueError('The model has ' + str(len(self.outputs)) + ' outputs, but you passed a single tensor as ' '`target_tensors`. Expected a list or a dict ' 'of tensors.') target_tensors = [target_tensors] else: raise TypeError('Expected `target_tensors` to be a tensor, ' 'a list of tensors, or dict of tensors, but got:', target_tensors) for i in range(len(self.outputs)): if i in skip_target_indices: self.targets.append(None) else: shape = K.int_shape(self.outputs[i]) name = self.output_names[i] if target_tensors is not None: target = target_tensors[i] else: target = None if target is None or K.is_placeholder(target): if target is None: target = K.placeholder( ndim=len(shape), name=name + '_target', sparse=K.is_sparse(self.outputs[i]), dtype=K.dtype(self.outputs[i])) self._feed_targets.append(target) self._feed_outputs.append(self.outputs[i]) self._feed_output_names.append(name) self._feed_output_shapes.append(shape) self._feed_loss_fns.append(self.loss_functions[i]) else: skip_target_weighing_indices.append(i) self.targets.append(target) # Prepare sample weights. sample_weights = [] sample_weight_modes = [] if isinstance(sample_weight_mode, dict): for name in sample_weight_mode: if name not in self.output_names: raise ValueError('Unknown entry in ' 'sample_weight_mode dictionary: "' + name + '". ' 'Only expected the following keys: ' + str(self.output_names)) for i, name in enumerate(self.output_names): if i in skip_target_weighing_indices: weight = None sample_weight_modes.append(None) else: if name not in sample_weight_mode: raise ValueError('Output "' + name + '" missing from sample_weight_modes ' 'dictionary') if sample_weight_mode.get(name) == 'temporal': weight = K.placeholder(ndim=2, name=name + '_sample_weights') sample_weight_modes.append('temporal') else: weight = K.placeholder(ndim=1, name=name + '_sample_weights') sample_weight_modes.append(None) sample_weights.append(weight) elif isinstance(sample_weight_mode, list): if len(sample_weight_mode) != len(self.outputs): raise ValueError('When passing a list as sample_weight_mode, ' 'it should have one entry per model output. ' 'The model has ' + str(len(self.outputs)) + ' outputs, but you passed ' 'sample_weight_mode=' + str(sample_weight_mode)) for i in range(len(self.output_names)): if i in skip_target_weighing_indices: weight = None sample_weight_modes.append(None) else: mode = sample_weight_mode[i] name = self.output_names[i] if mode == 'temporal': weight = K.placeholder(ndim=2, name=name + '_sample_weights') sample_weight_modes.append('temporal') else: weight = K.placeholder(ndim=1, name=name + '_sample_weights') sample_weight_modes.append(None) sample_weights.append(weight) else: for i, name in enumerate(self.output_names): if i in skip_target_weighing_indices: sample_weight_modes.append(None) sample_weights.append(None) else: if sample_weight_mode == 'temporal': sample_weights.append( K.placeholder(ndim=2, name=name + '_sample_weights')) sample_weight_modes.append('temporal') else: sample_weights.append( K.placeholder(ndim=1, name=name + '_sample_weights')) sample_weight_modes.append(None) self.sample_weight_modes = sample_weight_modes self._feed_sample_weight_modes = [] for i in range(len(self.outputs)): if i not in skip_target_weighing_indices: self._feed_sample_weight_modes.append( self.sample_weight_modes[i]) # Prepare metrics. self.metrics_names = ['loss'] self.metrics_tensors = [] # Compute total loss. total_loss = None with K.name_scope('loss'): for i in range(len(self.outputs)): if i in skip_target_indices: continue y_true = self.targets[i] y_pred = self.outputs[i] weighted_loss = weighted_losses[i] sample_weight = sample_weights[i] mask = masks[i] loss_weight = loss_weights_list[i] with K.name_scope(self.output_names[i] + '_loss'): output_loss = weighted_loss(y_true, y_pred, sample_weight, mask) if len(self.outputs) > 1: self.metrics_tensors.append(output_loss) self.metrics_names.append(self.output_names[i] + '_loss') if total_loss is None: total_loss = loss_weight * output_loss else: total_loss += loss_weight * output_loss if total_loss is None: if not self.losses: raise ValueError('The model cannot be compiled ' 'because it has no loss to optimize.') else: total_loss = 0. # Add regularization penalties # and other layer-specific losses. for loss_tensor in self.losses: total_loss += loss_tensor # List of same size as output_names. # contains tuples (metrics for output, names of metrics). nested_metrics = collect_metrics(metrics, self.output_names) nested_weighted_metrics = collect_metrics(weighted_metrics, self.output_names) self.metrics_updates = [] self.stateful_metric_names = [] self.stateful_metric_functions = [] def handle_metrics(metrics, weights=None): metric_name_prefix = 'weighted_' if weights is not None else '' for metric in metrics: if metric in ('accuracy', 'acc', 'crossentropy', 'ce'): # custom handling of accuracy/crossentropy # (because of class mode duality) output_shape = K.int_shape(self.outputs[i]) if (output_shape[-1] == 1 or self.loss_functions[i] == losses.binary_crossentropy): # case: binary accuracy/crossentropy if metric in ('accuracy', 'acc'): metric_fn = metrics_module.binary_accuracy elif metric in ('crossentropy', 'ce'): metric_fn = metrics_module.binary_crossentropy elif (self.loss_functions[i] == losses.sparse_categorical_crossentropy): # case: categorical accuracy/crossentropy # with sparse targets if metric in ('accuracy', 'acc'): metric_fn = metrics_module.sparse_categorical_accuracy elif metric in ('crossentropy', 'ce'): metric_fn = ( metrics_module.sparse_categorical_crossentropy) else: # case: categorical accuracy/crossentropy if metric in ('accuracy', 'acc'): metric_fn = metrics_module.categorical_accuracy elif metric in ('crossentropy', 'ce'): metric_fn = metrics_module.categorical_crossentropy if metric in ('accuracy', 'acc'): suffix = 'acc' elif metric in ('crossentropy', 'ce'): suffix = 'ce' weighted_metric_fn = weighted_masked_objective(metric_fn) metric_name = metric_name_prefix + suffix else: metric_fn = metrics_module.get(metric) weighted_metric_fn = weighted_masked_objective(metric_fn) # Get metric name as string if hasattr(metric_fn, 'name'): metric_name = metric_fn.name else: metric_name = metric_fn.__name__ metric_name = metric_name_prefix + metric_name with K.name_scope(metric_name): metric_result = weighted_metric_fn(y_true, y_pred, weights=weights, mask=masks[i]) # Append to self.metrics_names, self.metric_tensors, # self.stateful_metric_names if len(self.output_names) > 1: metric_name = self.output_names[i] + '_' + metric_name # Dedupe name j = 1 base_metric_name = metric_name while metric_name in self.metrics_names: metric_name = base_metric_name + '_' + str(j) j += 1 self.metrics_names.append(metric_name) self.metrics_tensors.append(metric_result) # Keep track of state updates created by # stateful metrics (i.e. metrics layers). if isinstance(metric_fn, Layer) and metric_fn.stateful: self.stateful_metric_names.append(metric_name) self.stateful_metric_functions.append(metric_fn) self.metrics_updates += metric_fn.updates with K.name_scope('metrics'): for i in range(len(self.outputs)): if i in skip_target_indices: continue y_true = self.targets[i] y_pred = self.outputs[i] weights = sample_weights[i] output_metrics = nested_metrics[i] output_weighted_metrics = nested_weighted_metrics[i] handle_metrics(output_metrics) handle_metrics(output_weighted_metrics, weights=weights) # Prepare gradient updates and state updates. self.total_loss = total_loss self.sample_weights = sample_weights self._feed_sample_weights = [] for i in range(len(self.sample_weights)): if i not in skip_target_weighing_indices: self._feed_sample_weights.append(sample_weights[i]) # Functions for train, test and predict will # be compiled lazily when required. # This saves time when the user is not using all functions. self._function_kwargs = kwargs self.train_function = None self.test_function = None self.predict_function = None # Collected trainable weights, sorted in topological order. trainable_weights = self.trainable_weights self._collected_trainable_weights = trainable_weights def _check_trainable_weights_consistency(self): """Check trainable weights count consistency. This will raise a warning if `trainable_weights` and `_collected_trainable_weights` are inconsistent (i.e. have different number of parameters). Inconsistency will typically arise when one modifies `model.trainable` without calling `model.compile` again. """ if not hasattr(self, '_collected_trainable_weights'): return if (len(self.trainable_weights) != len(self._collected_trainable_weights)): warnings.warn(UserWarning( 'Discrepancy between trainable weights and collected trainable' ' weights, did you set `model.trainable` without calling' ' `model.compile` after ?')) def _make_train_function(self): if not hasattr(self, 'train_function'): raise RuntimeError('You must compile your model before using it.') self._check_trainable_weights_consistency() if self.train_function is None: inputs = (self._feed_inputs + self._feed_targets + self._feed_sample_weights) if self._uses_dynamic_learning_phase(): inputs += [K.learning_phase()] with K.name_scope('training'): with K.name_scope(self.optimizer.__class__.__name__): training_updates = self.optimizer.get_updates( params=self._collected_trainable_weights, loss=self.total_loss) updates = (self.updates + training_updates + self.metrics_updates) # Gets loss and metrics. Updates weights at each call. self.train_function = K.function( inputs, [self.total_loss] + self.metrics_tensors, updates=updates, name='train_function', **self._function_kwargs) def _make_test_function(self): if not hasattr(self, 'test_function'): raise RuntimeError('You must compile your model before using it.') if self.test_function is None: inputs = (self._feed_inputs + self._feed_targets + self._feed_sample_weights) if self._uses_dynamic_learning_phase(): inputs += [K.learning_phase()] # Return loss and metrics, no gradient updates. # Does update the network states. self.test_function = K.function( inputs, [self.total_loss] + self.metrics_tensors, updates=self.state_updates + self.metrics_updates, name='test_function', **self._function_kwargs) def _make_predict_function(self): if not hasattr(self, 'predict_function'): self.predict_function = None if self.predict_function is None: if self._uses_dynamic_learning_phase(): inputs = self._feed_inputs + [K.learning_phase()] else: inputs = self._feed_inputs # Gets network outputs. Does not update weights. # Does update the network states. kwargs = getattr(self, '_function_kwargs', {}) self.predict_function = K.function(inputs, self.outputs, updates=self.state_updates, name='predict_function', **kwargs) def _uses_dynamic_learning_phase(self): return (self.uses_learning_phase and not isinstance(K.learning_phase(), int)) def _set_inputs(self, inputs, outputs=None, training=None): """Set model's input and output specs based on the input data received. This is to be used for Model subclasses, which do not know at instantiation time what their inputs look like. # Arguments inputs: Single array, or list of arrays. The arrays could be placeholders, Numpy arrays, or data tensors. - if placeholders: the model is built on top of these placeholders, and we expect Numpy data to be fed for them when calling `fit`/etc. - if Numpy data: we create placeholders matching the shape of the Numpy arrays. We expect Numpy data to be fed for these placeholders when calling `fit`/etc. - if data tensors: the model is built on top of these tensors. We do not expect any Numpy data to be provided when calling `fit`/etc. outputs: Optional output tensors (if already computed by running the model). training: Boolean or None. Only relevant in symbolic mode. Specifies whether to build the model's graph in inference mode (False), training mode (True), or using the Keras learning phase (None). """ if self.__class__.__name__ == 'Sequential': # Note: we can't test whether the model # is `Sequential` via `isinstance` # since `Sequential` depends on `Model`. if isinstance(inputs, list): assert len(inputs) == 1 inputs = inputs[0] self.build(input_shape=(None,) + inputs.shape[1:]) return if self.inputs: raise ValueError('Model inputs are already set.') # On-the-fly setting of symbolic model inputs # (either by using the tensor provided, # or by creating a placeholder if Numpy data was provided). self.inputs = [] self.input_names = [] self._feed_inputs = [] self._feed_input_names = [] self._feed_input_shapes = [] inputs = to_list(inputs, allow_tuple=True) for i, v in enumerate(inputs): name = 'input_%d' % (i + 1) self.input_names.append(name) if isinstance(v, list): v = np.asarray(v) if v.ndim == 1: v = np.expand_dims(v, 1) if isinstance(v, (np.ndarray)): # We fix the placeholder shape except the batch size. # This is suboptimal, but it is the best we can do with the info # we have. The user should call `model._set_inputs(placeholders)` # to specify custom placeholders if the need arises. shape = (None,) + v.shape[1:] placeholder = K.placeholder(shape=shape, name=name) self.inputs.append(placeholder) self._feed_inputs.append(placeholder) self._feed_input_names.append(name) self._feed_input_shapes.append(shape) else: # Assumed tensor - TODO(fchollet) additional type check? self.inputs.append(v) if K.is_placeholder(v): self._feed_inputs.append(v) self._feed_input_names.append(name) self._feed_input_shapes.append(K.int_shape(v)) if outputs is None: # Obtain symbolic outputs by calling the model. if self._expects_training_arg: outputs = self.call(unpack_singleton(self.inputs), training=training) else: outputs = self.call(unpack_singleton(self.inputs)) outputs = to_list(outputs, allow_tuple=True) self.outputs = outputs self.output_names = [ 'output_%d' % (i + 1) for i in range(len(self.outputs))] self.built = True def _standardize_user_data(self, x, y=None, sample_weight=None, class_weight=None, check_array_lengths=True, batch_size=None): all_inputs = [] if not self.built: # We need to use `x` to set the model inputs. # We type-check that `x` and `y` are either single arrays # or lists of arrays. if isinstance(x, (list, tuple)): if not all(isinstance(v, np.ndarray) or K.is_tensor(v) for v in x): raise ValueError('Please provide as model inputs ' 'either a single ' 'array or a list of arrays. ' 'You passed: x=' + str(x)) all_inputs += list(x) elif isinstance(x, dict): raise ValueError('Please do not pass a dictionary ' 'as model inputs.') else: if not isinstance(x, np.ndarray) and not K.is_tensor(x): raise ValueError('Please provide as model inputs ' 'either a single ' 'array or a list of arrays. ' 'You passed: x=' + str(x)) all_inputs.append(x) # Build the model using the retrieved inputs (value or symbolic). # If values, then in symbolic-mode placeholders will be created # to match the value shapes. if not self.inputs: self._set_inputs(x) if y is not None: if not self.optimizer: raise RuntimeError('You must compile a model before ' 'training/testing. ' 'Use `model.compile(optimizer, loss)`.') if not self._is_compiled: # On-the-fly compilation of the model. # We need to use `y` to set the model targets. if isinstance(y, (list, tuple)): if not all(isinstance(v, np.ndarray) or K.is_tensor(v) for v in y): raise ValueError('Please provide as model targets ' 'either a single ' 'array or a list of arrays. ' 'You passed: y=' + str(y)) elif isinstance(y, dict): raise ValueError('Please do not pass a dictionary ' 'as model targets.') else: if not isinstance(y, np.ndarray) and not K.is_tensor(y): raise ValueError('Please provide as model targets ' 'either a single ' 'array or a list of arrays. ' 'You passed: y=' + str(y)) # Typecheck that all inputs are *either* value *or* symbolic. if y is not None: all_inputs += to_list(y, allow_tuple=True) if any(K.is_tensor(v) for v in all_inputs): if not all(K.is_tensor(v) for v in all_inputs): raise ValueError('Do not pass inputs that mix Numpy ' 'arrays and symbolic tensors. ' 'You passed: x=' + str(x) + '; y=' + str(y)) # Handle target tensors if any passed. y = to_list(y, allow_tuple=True) target_tensors = [v for v in y if K.is_tensor(v)] if not target_tensors: target_tensors = None self.compile(optimizer=self.optimizer, loss=self.loss, metrics=self.metrics, loss_weights=self.loss_weights, target_tensors=target_tensors) # If `x` and `y` were all symbolic, # then the model should not be fed any inputs and targets. # Note: in this case, `any` and `all` are equivalent since we disallow # mixed symbolic/value inputs. if any(K.is_tensor(v) for v in all_inputs): return [], [], [] # What follows is input validation and standardization to list format, # in the case where all inputs are value arrays. if not self._is_graph_network: # Case: symbolic-mode subclassed network. # Do not do shape validation. feed_input_names = self._feed_input_names feed_input_shapes = None else: # Case: symbolic-mode graph network. # In this case, we run extensive shape validation checks. feed_input_names = self._feed_input_names feed_input_shapes = self._feed_input_shapes # Standardize the inputs. x = standardize_input_data( x, feed_input_names, feed_input_shapes, check_batch_axis=False, # Don't enforce the batch size. exception_prefix='input') if y is not None: if not self._is_graph_network: feed_output_names = self._feed_output_names feed_output_shapes = None # Sample weighting not supported in this case. # TODO: consider supporting it. feed_sample_weight_modes = [None for _ in self.outputs] else: feed_output_names = self._feed_output_names feed_sample_weight_modes = self._feed_sample_weight_modes feed_output_shapes = [] for output_shape, loss_fn in zip(self._feed_output_shapes, self._feed_loss_fns): if loss_fn is losses.sparse_categorical_crossentropy: if K.image_data_format() == 'channels_first' and len( output_shape) in [4, 5]: feed_output_shapes.append( (output_shape[0], 1) + output_shape[2:]) else: feed_output_shapes.append(output_shape[:-1] + (1,)) elif (not hasattr(loss_fn, '__name__') or getattr(losses, loss_fn.__name__, None) is None): # If `loss_fn` is not a function (e.g. callable class) # or if it not in the `losses` module, then # it is a user-defined loss and we make no assumptions # about it. feed_output_shapes.append(None) else: feed_output_shapes.append(output_shape) # Standardize the outputs. y = standardize_input_data( y, feed_output_names, feed_output_shapes, check_batch_axis=False, # Don't enforce the batch size. exception_prefix='target') # Generate sample-wise weight values given the `sample_weight` and # `class_weight` arguments. sample_weights = standardize_sample_weights( sample_weight, feed_output_names) class_weights = standardize_class_weights( class_weight, feed_output_names) sample_weights = [ standardize_weights(ref, sw, cw, mode) for (ref, sw, cw, mode) in zip(y, sample_weights, class_weights, feed_sample_weight_modes) ] # Check that all arrays have the same length. check_array_length_consistency(x, y, sample_weights) if self._is_graph_network: # Additional checks to avoid users mistakenly # using improper loss fns. check_loss_and_target_compatibility( y, self._feed_loss_fns, feed_output_shapes) else: y = [] sample_weights = [] if self.stateful and batch_size: # Check that for stateful networks, number of samples is a multiple # of the static batch size. if x[0].shape[0] % batch_size != 0: raise ValueError('In a stateful network, ' 'you should only pass inputs with ' 'a number of samples that can be ' 'divided by the batch size. Found: ' + str(x[0].shape[0]) + ' samples') return x, y, sample_weights def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, **kwargs): """Trains the model for a given number of epochs (iterations on a dataset). # Arguments x: Numpy array of training data (if the model has a single input), or list of Numpy arrays (if the model has multiple inputs). If input layers in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. `x` can be `None` (default) if feeding from framework-native tensors (e.g. TensorFlow data tensors). y: Numpy array of target (label) data (if the model has a single output), or list of Numpy arrays (if the model has multiple outputs). If output layers in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. `y` can be `None` (default) if feeding from framework-native tensors (e.g. TensorFlow data tensors). batch_size: Integer or `None`. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. epochs: Integer. Number of epochs to train the model. An epoch is an iteration over the entire `x` and `y` data provided. Note that in conjunction with `initial_epoch`, `epochs` is to be understood as "final epoch". The model is not trained for a number of iterations given by `epochs`, but merely until the epoch of index `epochs` is reached. verbose: Integer. 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. callbacks: List of `keras.callbacks.Callback` instances. List of callbacks to apply during training. See [callbacks](/callbacks). validation_split: Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. validation_data: tuple `(x_val, y_val)` or tuple `(x_val, y_val, val_sample_weights)` on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. shuffle: Boolean (whether to shuffle the training data before each epoch) or str (for 'batch'). 'batch' is a special option for dealing with the limitations of HDF5 data; it shuffles in batch-sized chunks. Has no effect when `steps_per_epoch` is not `None`. class_weight: Optional dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss function (during training only). This can be useful to tell the model to "pay more attention" to samples from an under-represented class. sample_weight: Optional Numpy array of weights for the training samples, used for weighting the loss function (during training only). You can either pass a flat (1D) Numpy array with the same length as the input samples (1:1 mapping between weights and samples), or in the case of temporal data, you can pass a 2D array with shape `(samples, sequence_length)`, to apply a different weight to every timestep of every sample. In this case you should make sure to specify `sample_weight_mode="temporal"` in `compile()`. initial_epoch: Integer. Epoch at which to start training (useful for resuming a previous training run). steps_per_epoch: Integer or `None`. Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. When training with input tensors such as TensorFlow data tensors, the default `None` is equal to the number of samples in your dataset divided by the batch size, or 1 if that cannot be determined. validation_steps: Only relevant if `steps_per_epoch` is specified. Total number of steps (batches of samples) to validate before stopping. # Returns A `History` object. Its `History.history` attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). # Raises RuntimeError: If the model was never compiled. ValueError: In case of mismatch between the provided input data and what the model expects. """ # Backwards compatibility if batch_size is None and steps_per_epoch is None: batch_size = 32 # Legacy support if 'nb_epoch' in kwargs: warnings.warn('The `nb_epoch` argument in `fit` ' 'has been renamed `epochs`.', stacklevel=2) epochs = kwargs.pop('nb_epoch') if kwargs: raise TypeError('Unrecognized keyword arguments: ' + str(kwargs)) if x is None and y is None and steps_per_epoch is None: raise ValueError('If fitting from data tensors, ' 'you should specify the `steps_per_epoch` ' 'argument.') # Validate user data. x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight, class_weight=class_weight, batch_size=batch_size) # Prepare validation data. do_validation = False if validation_data: do_validation = True if len(validation_data) == 2: val_x, val_y = validation_data val_sample_weight = None elif len(validation_data) == 3: val_x, val_y, val_sample_weight = validation_data else: raise ValueError('When passing validation_data, ' 'it must contain 2 (x_val, y_val) ' 'or 3 (x_val, y_val, val_sample_weights) ' 'items, however it contains %d items' % len(validation_data)) val_x, val_y, val_sample_weights = self._standardize_user_data( val_x, val_y, sample_weight=val_sample_weight, batch_size=batch_size) if self._uses_dynamic_learning_phase(): val_ins = val_x + val_y + val_sample_weights + [0.] else: val_ins = val_x + val_y + val_sample_weights elif validation_split and 0. < validation_split < 1.: if any(K.is_tensor(t) for t in x): raise ValueError( 'If your data is in the form of symbolic tensors, ' 'you cannot use `validation_split`.') do_validation = True if hasattr(x[0], 'shape'): split_at = int(int(x[0].shape[0]) * (1. - validation_split)) else: split_at = int(len(x[0]) * (1. - validation_split)) x, val_x = (slice_arrays(x, 0, split_at), slice_arrays(x, split_at)) y, val_y = (slice_arrays(y, 0, split_at), slice_arrays(y, split_at)) sample_weights, val_sample_weights = ( slice_arrays(sample_weights, 0, split_at), slice_arrays(sample_weights, split_at)) if self._uses_dynamic_learning_phase(): val_ins = val_x + val_y + val_sample_weights + [0.] else: val_ins = val_x + val_y + val_sample_weights elif validation_steps: do_validation = True if self._uses_dynamic_learning_phase(): val_ins = [0.] # Prepare input arrays and training function. if self._uses_dynamic_learning_phase(): ins = x + y + sample_weights + [1.] else: ins = x + y + sample_weights self._make_train_function() f = self.train_function # Prepare display labels. out_labels = self.metrics_names if do_validation: self._make_test_function() val_f = self.test_function callback_metrics = copy.copy(out_labels) + [ 'val_' + n for n in out_labels] else: callback_metrics = copy.copy(out_labels) val_f = None val_ins = [] # Delegate logic to `fit_loop`. return training_arrays.fit_loop(self, f, ins, out_labels=out_labels, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, val_f=val_f, val_ins=val_ins, shuffle=shuffle, callback_metrics=callback_metrics, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) def evaluate(self, x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None): """Returns the loss value & metrics values for the model in test mode. Computation is done in batches. # Arguments x: Numpy array of test data (if the model has a single input), or list of Numpy arrays (if the model has multiple inputs). If input layers in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. `x` can be `None` (default) if feeding from framework-native tensors (e.g. TensorFlow data tensors). y: Numpy array of target (label) data (if the model has a single output), or list of Numpy arrays (if the model has multiple outputs). If output layers in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. `y` can be `None` (default) if feeding from framework-native tensors (e.g. TensorFlow data tensors). batch_size: Integer or `None`. Number of samples per evaluation step. If unspecified, `batch_size` will default to 32. verbose: 0 or 1. Verbosity mode. 0 = silent, 1 = progress bar. sample_weight: Optional Numpy array of weights for the test samples, used for weighting the loss function. You can either pass a flat (1D) Numpy array with the same length as the input samples (1:1 mapping between weights and samples), or in the case of temporal data, you can pass a 2D array with shape `(samples, sequence_length)`, to apply a different weight to every timestep of every sample. In this case you should make sure to specify `sample_weight_mode="temporal"` in `compile()`. steps: Integer or `None`. Total number of steps (batches of samples) before declaring the evaluation round finished. Ignored with the default value of `None`. # Returns Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. """ # Backwards compatibility. if batch_size is None and steps is None: batch_size = 32 if x is None and y is None and steps is None: raise ValueError('If evaluating from data tensors, ' 'you should specify the `steps` ' 'argument.') # Validate user data. x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight, batch_size=batch_size) # Prepare inputs, delegate logic to `test_loop`. if self._uses_dynamic_learning_phase(): ins = x + y + sample_weights + [0.] else: ins = x + y + sample_weights self._make_test_function() f = self.test_function return training_arrays.test_loop(self, f, ins, batch_size=batch_size, verbose=verbose, steps=steps) def predict(self, x, batch_size=None, verbose=0, steps=None): """Generates output predictions for the input samples. Computation is done in batches. # Arguments x: The input data, as a Numpy array (or list of Numpy arrays if the model has multiple inputs). batch_size: Integer. If unspecified, it will default to 32. verbose: Verbosity mode, 0 or 1. steps: Total number of steps (batches of samples) before declaring the prediction round finished. Ignored with the default value of `None`. # Returns Numpy array(s) of predictions. # Raises ValueError: In case of mismatch between the provided input data and the model's expectations, or in case a stateful model receives a number of samples that is not a multiple of the batch size. """ # Backwards compatibility. if batch_size is None and steps is None: batch_size = 32 if x is None and steps is None: raise ValueError('If predicting from data tensors, ' 'you should specify the `steps` ' 'argument.') # Validate user data. x, _, _ = self._standardize_user_data(x) if self.stateful: if x[0].shape[0] > batch_size and x[0].shape[0] % batch_size != 0: raise ValueError('In a stateful network, ' 'you should only pass inputs with ' 'a number of samples that can be ' 'divided by the batch size. Found: ' + str(x[0].shape[0]) + ' samples. ' 'Batch size: ' + str(batch_size) + '.') # Prepare inputs, delegate logic to `predict_loop`. if self._uses_dynamic_learning_phase(): ins = x + [0.] else: ins = x self._make_predict_function() f = self.predict_function return training_arrays.predict_loop(self, f, ins, batch_size=batch_size, verbose=verbose, steps=steps) def train_on_batch(self, x, y, sample_weight=None, class_weight=None): """Runs a single gradient update on a single batch of data. # Arguments x: Numpy array of training data, or list of Numpy arrays if the model has multiple inputs. If all inputs in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. y: Numpy array of target data, or list of Numpy arrays if the model has multiple outputs. If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. sample_weight: Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). class_weight: Optional dictionary mapping class indices (integers) to a weight (float) to apply to the model's loss for the samples from this class during training. This can be useful to tell the model to "pay more attention" to samples from an under-represented class. # Returns Scalar training loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. """ x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight, class_weight=class_weight) if self._uses_dynamic_learning_phase(): ins = x + y + sample_weights + [1.] else: ins = x + y + sample_weights self._make_train_function() outputs = self.train_function(ins) return unpack_singleton(outputs) def test_on_batch(self, x, y, sample_weight=None): """Test the model on a single batch of samples. # Arguments x: Numpy array of test data, or list of Numpy arrays if the model has multiple inputs. If all inputs in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. y: Numpy array of target data, or list of Numpy arrays if the model has multiple outputs. If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. sample_weight: Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). # Returns Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. """ x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight) if self._uses_dynamic_learning_phase(): ins = x + y + sample_weights + [0.] else: ins = x + y + sample_weights self._make_test_function() outputs = self.test_function(ins) return unpack_singleton(outputs) def predict_on_batch(self, x): """Returns predictions for a single batch of samples. # Arguments x: Input samples, as a Numpy array. # Returns Numpy array(s) of predictions. """ x, _, _ = self._standardize_user_data(x) if self._uses_dynamic_learning_phase(): ins = x + [0.] else: ins = x self._make_predict_function() outputs = self.predict_function(ins) return unpack_singleton(outputs) @interfaces.legacy_generator_methods_support def fit_generator(self, generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0): """Trains the model on data generated batch-by-batch by a Python generator (or an instance of `Sequence`). The generator is run in parallel to the model, for efficiency. For instance, this allows you to do real-time data augmentation on images on CPU in parallel to training your model on GPU. The use of `keras.utils.Sequence` guarantees the ordering and guarantees the single use of every input per epoch when using `use_multiprocessing=True`. # Arguments generator: A generator or an instance of `Sequence` (`keras.utils.Sequence`) object in order to avoid duplicate data when using multiprocessing. The output of the generator must be either - a tuple `(inputs, targets)` - a tuple `(inputs, targets, sample_weights)`. This tuple (a single output of the generator) makes a single batch. Therefore, all arrays in this tuple must have the same length (equal to the size of this batch). Different batches may have different sizes. For example, the last batch of the epoch is commonly smaller than the others, if the size of the dataset is not divisible by the batch size. The generator is expected to loop over its data indefinitely. An epoch finishes when `steps_per_epoch` batches have been seen by the model. steps_per_epoch: Integer. Total number of steps (batches of samples) to yield from `generator` before declaring one epoch finished and starting the next epoch. It should typically be equal to the number of samples of your dataset divided by the batch size. Optional for `Sequence`: if unspecified, will use the `len(generator)` as a number of steps. epochs: Integer. Number of epochs to train the model. An epoch is an iteration over the entire data provided, as defined by `steps_per_epoch`. Note that in conjunction with `initial_epoch`, `epochs` is to be understood as "final epoch". The model is not trained for a number of iterations given by `epochs`, but merely until the epoch of index `epochs` is reached. verbose: Integer. 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. callbacks: List of `keras.callbacks.Callback` instances. List of callbacks to apply during training. See [callbacks](/callbacks). validation_data: This can be either - a generator or a `Sequence` object for the validation data - tuple `(x_val, y_val)` - tuple `(x_val, y_val, val_sample_weights)` on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. validation_steps: Only relevant if `validation_data` is a generator. Total number of steps (batches of samples) to yield from `validation_data` generator before stopping at the end of every epoch. It should typically be equal to the number of samples of your validation dataset divided by the batch size. Optional for `Sequence`: if unspecified, will use the `len(validation_data)` as a number of steps. class_weight: Optional dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss function (during training only). This can be useful to tell the model to "pay more attention" to samples from an under-represented class. max_queue_size: Integer. Maximum size for the generator queue. If unspecified, `max_queue_size` will default to 10. workers: Integer. Maximum number of processes to spin up when using process-based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread. use_multiprocessing: Boolean. If `True`, use process-based threading. If unspecified, `use_multiprocessing` will default to `False`. Note that because this implementation relies on multiprocessing, you should not pass non-picklable arguments to the generator as they can't be passed easily to children processes. shuffle: Boolean. Whether to shuffle the order of the batches at the beginning of each epoch. Only used with instances of `Sequence` (`keras.utils.Sequence`). Has no effect when `steps_per_epoch` is not `None`. initial_epoch: Integer. Epoch at which to start training (useful for resuming a previous training run). # Returns A `History` object. Its `History.history` attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). # Raises ValueError: In case the generator yields data in an invalid format. # Example ```python def generate_arrays_from_file(path): while True: with open(path) as f: for line in f: # create numpy arrays of input data # and labels, from each line in the file x1, x2, y = process_line(line) yield ({'input_1': x1, 'input_2': x2}, {'output': y}) model.fit_generator(generate_arrays_from_file('/my_file.txt'), steps_per_epoch=10000, epochs=10) ``` """ return training_generator.fit_generator( self, generator, steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=verbose, callbacks=callbacks, validation_data=validation_data, validation_steps=validation_steps, class_weight=class_weight, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, shuffle=shuffle, initial_epoch=initial_epoch) @interfaces.legacy_generator_methods_support def evaluate_generator(self, generator, steps=None, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0): """Evaluates the model on a data generator. The generator should return the same kind of data as accepted by `test_on_batch`. # Arguments generator: Generator yielding tuples (inputs, targets) or (inputs, targets, sample_weights) or an instance of Sequence (keras.utils.Sequence) object in order to avoid duplicate data when using multiprocessing. steps: Total number of steps (batches of samples) to yield from `generator` before stopping. Optional for `Sequence`: if unspecified, will use the `len(generator)` as a number of steps. max_queue_size: maximum size for the generator queue workers: Integer. Maximum number of processes to spin up when using process based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread. use_multiprocessing: if True, use process based threading. Note that because this implementation relies on multiprocessing, you should not pass non picklable arguments to the generator as they can't be passed easily to children processes. verbose: verbosity mode, 0 or 1. # Returns Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. # Raises ValueError: In case the generator yields data in an invalid format. """ return training_generator.evaluate_generator( self, generator, steps=steps, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, verbose=verbose) @interfaces.legacy_generator_methods_support def predict_generator(self, generator, steps=None, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0): """Generates predictions for the input samples from a data generator. The generator should return the same kind of data as accepted by `predict_on_batch`. # Arguments generator: Generator yielding batches of input samples or an instance of Sequence (keras.utils.Sequence) object in order to avoid duplicate data when using multiprocessing. steps: Total number of steps (batches of samples) to yield from `generator` before stopping. Optional for `Sequence`: if unspecified, will use the `len(generator)` as a number of steps. max_queue_size: Maximum size for the generator queue. workers: Integer. Maximum number of processes to spin up when using process based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread. use_multiprocessing: If `True`, use process based threading. Note that because this implementation relies on multiprocessing, you should not pass non picklable arguments to the generator as they can't be passed easily to children processes. verbose: verbosity mode, 0 or 1. # Returns Numpy array(s) of predictions. # Raises ValueError: In case the generator yields data in an invalid format. """ return training_generator.predict_generator( self, generator, steps=steps, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, verbose=verbose) Keras-2.2.4/LICENSE0000644000000000116100000000312013240665765013340 0ustar rooteng00000000000000COPYRIGHT All contributions by François Chollet: Copyright (c) 2015 - 2018, François Chollet. All rights reserved. All contributions by Google: Copyright (c) 2015 - 2018, Google, Inc. All rights reserved. All contributions by Microsoft: Copyright (c) 2017 - 2018, Microsoft, Inc. All rights reserved. All other contributions: Copyright (c) 2015 - 2018, the respective contributors. All rights reserved. Each contributor holds copyright over their respective contributions. The project versioning (Git) records all such contribution source information. LICENSE The MIT License (MIT) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Keras-2.2.4/tests/0000755000000000116100000000000013355226624013473 5ustar rooteng00000000000000Keras-2.2.4/tests/keras/0000755000000000116100000000000013355226624014600 5ustar rooteng00000000000000Keras-2.2.4/tests/keras/activations_test.py0000644000000000116100000001542713354530144020540 0ustar rooteng00000000000000import pytest import numpy as np from numpy.testing import assert_allclose from keras import backend as K from keras import activations from keras.layers.core import Dense def get_standard_values(): """A set of floats used for testing the activations. """ return np.array([[0, 0.1, 0.5, 0.9, 1.0]], dtype=K.floatx()) def test_serialization(): all_activations = ['softmax', 'relu', 'elu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear', 'softplus', 'softsign', 'selu'] for name in all_activations: fn = activations.get(name) ref_fn = getattr(activations, name) assert fn == ref_fn config = activations.serialize(fn) fn = activations.deserialize(config) assert fn == ref_fn def test_get_fn(): """Activations has a convenience "get" function. All paths of this function are tested here, although the behaviour in some instances seems potentially surprising (e.g. situation 3) """ # 1. Default returns linear a = activations.get(None) assert a == activations.linear # 2. Passing in a layer raises a warning layer = Dense(32) with pytest.warns(UserWarning): a = activations.get(layer) # 3. Callables return themselves for some reason a = activations.get(lambda x: 5) assert a(None) == 5 # 4. Anything else is not a valid argument with pytest.raises(ValueError): a = activations.get(6) def test_softmax_valid(): """Test using a reference implementation of softmax. """ def softmax(values): m = np.max(values) e = np.exp(values - m) return e / np.sum(e) x = K.placeholder(ndim=2) f = K.function([x], [activations.softmax(x)]) test_values = get_standard_values() result = f([test_values])[0] expected = softmax(test_values) assert_allclose(result, expected, rtol=1e-05) def test_softmax_invalid(): """Test for the expected exception behaviour on invalid input """ x = K.placeholder(ndim=1) # One dimensional arrays are supposed to raise a value error with pytest.raises(ValueError): f = K.function([x], [activations.softmax(x)]) def test_softmax_3d(): """Test using a reference implementation of softmax. """ def softmax(values, axis): m = np.max(values, axis=axis, keepdims=True) e = np.exp(values - m) return e / np.sum(e, axis=axis, keepdims=True) x = K.placeholder(ndim=3) f = K.function([x], [activations.softmax(x, axis=1)]) test_values = get_standard_values()[:, :, np.newaxis].copy() result = f([test_values])[0] expected = softmax(test_values, axis=1) assert_allclose(result, expected, rtol=1e-05) def test_time_distributed_softmax(): x = K.placeholder(shape=(1, 1, 5)) f = K.function([x], [activations.softmax(x)]) test_values = get_standard_values() test_values = np.reshape(test_values, (1, 1, np.size(test_values))) f([test_values])[0] def test_softplus(): """Test using a reference softplus implementation. """ def softplus(x): return np.log(np.ones_like(x) + np.exp(x)) x = K.placeholder(ndim=2) f = K.function([x], [activations.softplus(x)]) test_values = get_standard_values() result = f([test_values])[0] expected = softplus(test_values) assert_allclose(result, expected, rtol=1e-05) def test_softsign(): """Test using a reference softsign implementation. """ def softsign(x): return np.divide(x, np.ones_like(x) + np.absolute(x)) x = K.placeholder(ndim=2) f = K.function([x], [activations.softsign(x)]) test_values = get_standard_values() result = f([test_values])[0] expected = softsign(test_values) assert_allclose(result, expected, rtol=1e-05) def test_sigmoid(): """Test using a numerically stable reference sigmoid implementation. """ def ref_sigmoid(x): if x >= 0: return 1 / (1 + np.exp(-x)) else: z = np.exp(x) return z / (1 + z) sigmoid = np.vectorize(ref_sigmoid) x = K.placeholder(ndim=2) f = K.function([x], [activations.sigmoid(x)]) test_values = get_standard_values() result = f([test_values])[0] expected = sigmoid(test_values) assert_allclose(result, expected, rtol=1e-05) def test_hard_sigmoid(): """Test using a reference hard sigmoid implementation. """ def ref_hard_sigmoid(x): x = (x * 0.2) + 0.5 z = 0.0 if x <= 0 else (1.0 if x >= 1 else x) return z hard_sigmoid = np.vectorize(ref_hard_sigmoid) x = K.placeholder(ndim=2) f = K.function([x], [activations.hard_sigmoid(x)]) test_values = get_standard_values() result = f([test_values])[0] expected = hard_sigmoid(test_values) assert_allclose(result, expected, rtol=1e-05) def test_relu(): x = K.placeholder(ndim=2) f = K.function([x], [activations.relu(x)]) test_values = get_standard_values() result = f([test_values])[0] assert_allclose(result, test_values, rtol=1e-05) # Test max_value test_values = np.array([[0.5, 1.5]], dtype=K.floatx()) f = K.function([x], [activations.relu(x, max_value=1.)]) result = f([test_values])[0] assert np.max(result) <= 1. # Test max_value == 6. test_values = np.array([[0.5, 6.]], dtype=K.floatx()) f = K.function([x], [activations.relu(x, max_value=1.)]) result = f([test_values])[0] assert np.max(result) <= 6. def test_elu(): x = K.placeholder(ndim=2) f = K.function([x], [activations.elu(x, 0.5)]) test_values = get_standard_values() result = f([test_values])[0] assert_allclose(result, test_values, rtol=1e-05) negative_values = np.array([[-1, -2]], dtype=K.floatx()) result = f([negative_values])[0] true_result = (np.exp(negative_values) - 1) / 2 assert_allclose(result, true_result) def test_selu(): x = K.placeholder(ndim=2) f = K.function([x], [activations.selu(x)]) alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 positive_values = get_standard_values() result = f([positive_values])[0] assert_allclose(result, positive_values * scale, rtol=1e-05) negative_values = np.array([[-1, -2]], dtype=K.floatx()) result = f([negative_values])[0] true_result = (np.exp(negative_values) - 1) * scale * alpha assert_allclose(result, true_result) def test_tanh(): test_values = get_standard_values() x = K.placeholder(ndim=2) exp = activations.tanh(x) f = K.function([x], [exp]) result = f([test_values])[0] expected = np.tanh(test_values) assert_allclose(result, expected, rtol=1e-05) def test_linear(): xs = [1, 5, True, None] for x in xs: assert(x == activations.linear(x)) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/wrappers/0000755000000000116100000000000013355226624016443 5ustar rooteng00000000000000Keras-2.2.4/tests/keras/wrappers/scikit_learn_test.py0000644000000000116100000001311713342055016022515 0ustar rooteng00000000000000import pytest import numpy as np from keras.utils.test_utils import get_test_data from keras.models import Sequential from keras.layers.core import Dense, Activation from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor input_dim = 5 hidden_dims = 5 num_train = 100 num_test = 50 num_classes = 3 batch_size = 32 epochs = 1 verbosity = 0 optim = 'adam' loss = 'categorical_crossentropy' np.random.seed(42) (X_train, y_train), (X_test, y_test) = get_test_data( num_train=num_train, num_test=num_test, input_shape=(input_dim,), classification=True, num_classes=num_classes) def build_fn_clf(hidden_dims): model = Sequential() model.add(Dense(input_dim, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(hidden_dims)) model.add(Activation('relu')) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy']) return model def test_classify_build_fn(): clf = KerasClassifier( build_fn=build_fn_clf, hidden_dims=hidden_dims, batch_size=batch_size, epochs=epochs) assert_classification_works(clf) assert_string_classification_works(clf) def test_classify_class_build_fn(): class ClassBuildFnClf(object): def __call__(self, hidden_dims): return build_fn_clf(hidden_dims) clf = KerasClassifier( build_fn=ClassBuildFnClf(), hidden_dims=hidden_dims, batch_size=batch_size, epochs=epochs) assert_classification_works(clf) assert_string_classification_works(clf) def test_classify_inherit_class_build_fn(): class InheritClassBuildFnClf(KerasClassifier): def __call__(self, hidden_dims): return build_fn_clf(hidden_dims) clf = InheritClassBuildFnClf( build_fn=None, hidden_dims=hidden_dims, batch_size=batch_size, epochs=epochs) assert_classification_works(clf) assert_string_classification_works(clf) def assert_classification_works(clf): clf.fit(X_train, y_train, sample_weight=np.ones(X_train.shape[0]), batch_size=batch_size, epochs=epochs) score = clf.score(X_train, y_train, batch_size=batch_size) assert np.isscalar(score) and np.isfinite(score) preds = clf.predict(X_test, batch_size=batch_size) assert preds.shape == (num_test, ) for prediction in np.unique(preds): assert prediction in range(num_classes) proba = clf.predict_proba(X_test, batch_size=batch_size) assert proba.shape == (num_test, num_classes) assert np.allclose(np.sum(proba, axis=1), np.ones(num_test)) def assert_string_classification_works(clf): string_classes = ['cls{}'.format(x) for x in range(num_classes)] str_y_train = np.array(string_classes)[y_train] clf.fit(X_train, str_y_train, batch_size=batch_size, epochs=epochs) score = clf.score(X_train, str_y_train, batch_size=batch_size) assert np.isscalar(score) and np.isfinite(score) preds = clf.predict(X_test, batch_size=batch_size) assert preds.shape == (num_test, ) for prediction in np.unique(preds): assert prediction in string_classes proba = clf.predict_proba(X_test, batch_size=batch_size) assert proba.shape == (num_test, num_classes) assert np.allclose(np.sum(proba, axis=1), np.ones(num_test)) def build_fn_reg(hidden_dims=50): model = Sequential() model.add(Dense(input_dim, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(hidden_dims)) model.add(Activation('relu')) model.add(Dense(1)) model.add(Activation('linear')) model.compile(optimizer='sgd', loss='mean_absolute_error', metrics=['accuracy']) return model def test_regression_build_fn(): reg = KerasRegressor( build_fn=build_fn_reg, hidden_dims=hidden_dims, batch_size=batch_size, epochs=epochs) assert_regression_works(reg) def test_regression_class_build_fn(): class ClassBuildFnReg(object): def __call__(self, hidden_dims): return build_fn_reg(hidden_dims) reg = KerasRegressor( build_fn=ClassBuildFnReg(), hidden_dims=hidden_dims, batch_size=batch_size, epochs=epochs) assert_regression_works(reg) def test_regression_inherit_class_build_fn(): class InheritClassBuildFnReg(KerasRegressor): def __call__(self, hidden_dims): return build_fn_reg(hidden_dims) reg = InheritClassBuildFnReg( build_fn=None, hidden_dims=hidden_dims, batch_size=batch_size, epochs=epochs) assert_regression_works(reg) def assert_regression_works(reg): reg.fit(X_train, y_train, batch_size=batch_size, epochs=epochs) score = reg.score(X_train, y_train, batch_size=batch_size) assert np.isscalar(score) and np.isfinite(score) preds = reg.predict(X_test, batch_size=batch_size) assert preds.shape == (num_test, ) if __name__ == '__main__': pytest.main([__file__]) # Usage of sklearn's grid_search # from sklearn import grid_search # parameters = dict(hidden_dims = [20, 30], batch_size=[64, 128], # epochs=[2], verbose=[0]) # classifier = Inherit_class_build_fn_clf() # clf = grid_search.GridSearchCV(classifier, parameters) # clf.fit(X_train, y_train) # parameters = dict(hidden_dims = [20, 30], batch_size=[64, 128], # epochs=[2], verbose=[0]) # regressor = Inherit_class_build_fn_reg() # reg = grid_search.GridSearchCV(regressor, parameters, # scoring='mean_squared_error', # n_jobs=1, cv=2, verbose=2) # reg.fit(X_train_reg, y_train_reg) Keras-2.2.4/tests/keras/test_callbacks.py0000644000000000116100000010437313354530144020132 0ustar rooteng00000000000000import os import multiprocessing import numpy as np import pytest from numpy.testing import assert_allclose from csv import reader from csv import Sniffer import shutil from keras import optimizers from keras import initializers from keras import callbacks from keras.models import Sequential, Model from keras.layers import Input, Dense, Dropout, add, dot, Lambda, Layer from keras.layers.convolutional import Conv2D from keras.layers.pooling import MaxPooling2D from keras.layers.pooling import GlobalAveragePooling1D from keras.layers.pooling import GlobalAveragePooling2D from keras.utils.test_utils import get_test_data from keras.utils.generic_utils import to_list from keras.utils.generic_utils import unpack_singleton from keras import backend as K from keras.utils import np_utils try: from unittest.mock import patch except: from mock import patch input_dim = 2 num_hidden = 4 num_classes = 2 batch_size = 5 train_samples = 20 test_samples = 20 def data_generator(x, y, batch_size): x = to_list(x) y = to_list(y) max_batch_index = len(x[0]) // batch_size i = 0 while 1: x_batch = [array[i * batch_size: (i + 1) * batch_size] for array in x] x_batch = unpack_singleton(x_batch) y_batch = [array[i * batch_size: (i + 1) * batch_size] for array in y] y_batch = unpack_singleton(y_batch) yield x_batch, y_batch i += 1 i = i % max_batch_index # Changing the default arguments of get_test_data. def get_data_callbacks(num_train=train_samples, num_test=test_samples, input_shape=(input_dim,), classification=True, num_classes=num_classes): return get_test_data(num_train=num_train, num_test=num_test, input_shape=input_shape, classification=classification, num_classes=num_classes) def test_TerminateOnNaN(): np.random.seed(1337) (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) cbks = [callbacks.TerminateOnNaN()] model = Sequential() initializer = initializers.Constant(value=1e5) for _ in range(5): model.add(Dense(num_hidden, input_dim=input_dim, activation='relu', kernel_initializer=initializer)) model.add(Dense(num_classes, activation='linear')) model.compile(loss='mean_squared_error', optimizer='rmsprop') # case 1 fit history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=20) loss = history.history['loss'] assert len(loss) == 1 assert loss[0] == np.inf history = model.fit_generator(data_generator(X_train, y_train, batch_size), len(X_train), validation_data=(X_test, y_test), callbacks=cbks, epochs=20) loss = history.history['loss'] assert len(loss) == 1 assert loss[0] == np.inf or np.isnan(loss[0]) def test_stop_training_csv(tmpdir): np.random.seed(1337) fp = str(tmpdir / 'test.csv') (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) cbks = [callbacks.TerminateOnNaN(), callbacks.CSVLogger(fp)] model = Sequential() for _ in range(5): model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_classes, activation='linear')) model.compile(loss='mean_squared_error', optimizer='rmsprop') def data_generator(): i = 0 max_batch_index = len(X_train) // batch_size tot = 0 while 1: if tot > 3 * len(X_train): yield (np.ones([batch_size, input_dim]) * np.nan, np.ones([batch_size, num_classes]) * np.nan) else: yield (X_train[i * batch_size: (i + 1) * batch_size], y_train[i * batch_size: (i + 1) * batch_size]) i += 1 tot += 1 i = i % max_batch_index history = model.fit_generator(data_generator(), len(X_train) // batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=20) loss = history.history['loss'] assert len(loss) > 1 assert loss[-1] == np.inf or np.isnan(loss[-1]) values = [] with open(fp) as f: for x in reader(f): values.append(x) assert 'nan' in values[-1], 'The last epoch was not logged.' os.remove(fp) def test_ModelCheckpoint(tmpdir): np.random.seed(1337) filepath = str(tmpdir / 'checkpoint.h5') (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) # case 1 monitor = 'val_loss' save_best_only = False mode = 'auto' model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor, save_best_only=save_best_only, mode=mode)] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=1) assert os.path.isfile(filepath) os.remove(filepath) # case 2 mode = 'min' cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor, save_best_only=save_best_only, mode=mode)] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=1) assert os.path.isfile(filepath) os.remove(filepath) # case 3 mode = 'max' monitor = 'val_acc' cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor, save_best_only=save_best_only, mode=mode)] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=1) assert os.path.isfile(filepath) os.remove(filepath) # case 4 save_best_only = True cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor, save_best_only=save_best_only, mode=mode)] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=1) assert os.path.isfile(filepath) os.remove(filepath) # case 5 save_best_only = False period = 2 mode = 'auto' filepath = 'checkpoint.{epoch:02d}.h5' cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor, save_best_only=save_best_only, mode=mode, period=period)] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=4) assert os.path.isfile(filepath.format(epoch=2)) assert os.path.isfile(filepath.format(epoch=4)) assert not os.path.exists(filepath.format(epoch=1)) assert not os.path.exists(filepath.format(epoch=3)) os.remove(filepath.format(epoch=2)) os.remove(filepath.format(epoch=4)) assert not tmpdir.listdir() def test_EarlyStopping(): np.random.seed(1337) (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) mode = 'max' monitor = 'val_acc' patience = 0 cbks = [callbacks.EarlyStopping(patience=patience, monitor=monitor, mode=mode)] history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=20) mode = 'auto' monitor = 'val_acc' patience = 2 cbks = [callbacks.EarlyStopping(patience=patience, monitor=monitor, mode=mode)] history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=20) def test_EarlyStopping_reuse(): np.random.seed(1337) patience = 3 data = np.random.random((100, 1)) labels = np.where(data > 0.5, 1, 0) model = Sequential(( Dense(1, input_dim=1, activation='relu'), Dense(1, activation='sigmoid'), )) model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy']) stopper = callbacks.EarlyStopping(monitor='acc', patience=patience) weights = model.get_weights() hist = model.fit(data, labels, callbacks=[stopper], epochs=20) assert len(hist.epoch) >= patience # This should allow training to go for at least `patience` epochs model.set_weights(weights) hist = model.fit(data, labels, callbacks=[stopper], epochs=20) assert len(hist.epoch) >= patience def test_EarlyStopping_patience(): class DummyModel(object): def __init__(self): self.stop_training = False def get_weights(self): return [] def set_weights(self, weights): pass early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=2) early_stop.model = DummyModel() losses = [0.0860, 0.1096, 0.1040, 0.1019] # Should stop after epoch 3, # as the loss has not improved after patience=2 epochs. epochs_trained = 0 early_stop.on_train_begin() for epoch in range(len(losses)): epochs_trained += 1 early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]}) if early_stop.model.stop_training: break assert epochs_trained == 3 def test_EarlyStopping_baseline(): class DummyModel(object): def __init__(self): self.stop_training = False def get_weights(self): return [] def set_weights(self, weights): pass def baseline_tester(acc_levels): early_stop = callbacks.EarlyStopping(monitor='val_acc', baseline=0.75, patience=2) early_stop.model = DummyModel() epochs_trained = 0 early_stop.on_train_begin() for epoch in range(len(acc_levels)): epochs_trained += 1 early_stop.on_epoch_end(epoch, logs={'val_acc': acc_levels[epoch]}) if early_stop.model.stop_training: break return epochs_trained acc_levels = [0.55, 0.76, 0.81, 0.81] baseline_met = baseline_tester(acc_levels) acc_levels = [0.55, 0.74, 0.81, 0.81] baseline_not_met = baseline_tester(acc_levels) # All epochs should run because baseline was met in second epoch assert baseline_met == 4 # Baseline was not met by second epoch and should stop assert baseline_not_met == 2 def test_EarlyStopping_final_weights(): class DummyModel(object): def __init__(self): self.stop_training = False self.weights = -1 def get_weights(self): return self.weights def set_weights(self, weights): self.weights = weights def set_weight_to_epoch(self, epoch): self.weights = epoch early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=2) early_stop.model = DummyModel() losses = [0.2, 0.15, 0.1, 0.11, 0.12] epochs_trained = 0 early_stop.on_train_begin() for epoch in range(len(losses)): epochs_trained += 1 early_stop.model.set_weight_to_epoch(epoch=epoch) early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]}) if early_stop.model.stop_training: break # The best configuration is in the epoch 2 (loss = 0.1000), # so with patience=2 we need to end up at epoch 4 assert early_stop.model.get_weights() == 4 def test_EarlyStopping_final_weights_when_restoring_model_weights(): class DummyModel(object): def __init__(self): self.stop_training = False self.weights = -1 def get_weights(self): return self.weights def set_weights(self, weights): self.weights = weights def set_weight_to_epoch(self, epoch): self.weights = epoch early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True) early_stop.model = DummyModel() losses = [0.2, 0.15, 0.1, 0.11, 0.12] # The best configuration is in the epoch 2 (loss = 0.1000). epochs_trained = 0 early_stop.on_train_begin() for epoch in range(len(losses)): epochs_trained += 1 early_stop.model.set_weight_to_epoch(epoch=epoch) early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]}) if early_stop.model.stop_training: break # The best configuration is in epoch 2 (loss = 0.1000), # and while patience = 2, we're restoring the best weights, # so we end up at the epoch with the best weights, i.e. epoch 2 assert early_stop.model.get_weights() == 2 def test_LearningRateScheduler(): np.random.seed(1337) (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) cbks = [callbacks.LearningRateScheduler(lambda x: 1. / (1. + x))] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=5) assert (float(K.get_value(model.optimizer.lr)) - 0.2) < K.epsilon() def test_ReduceLROnPlateau(): np.random.seed(1337) (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) def make_model(): np.random.seed(1337) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(lr=0.1), metrics=['accuracy']) return model model = make_model() # This should reduce the LR after the first epoch (due to high epsilon). cbks = [callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_delta=10, patience=1, cooldown=5)] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=5, verbose=2) assert_allclose(float(K.get_value(model.optimizer.lr)), 0.01, atol=K.epsilon()) model = make_model() cbks = [callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_delta=0, patience=1, cooldown=5)] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=5, verbose=2) assert_allclose(float(K.get_value(model.optimizer.lr)), 0.1, atol=K.epsilon()) def test_ReduceLROnPlateau_patience(): class DummyOptimizer(object): def __init__(self): self.lr = K.variable(1.0) class DummyModel(object): def __init__(self): self.optimizer = DummyOptimizer() reduce_on_plateau = callbacks.ReduceLROnPlateau(monitor='val_loss', patience=2) reduce_on_plateau.model = DummyModel() losses = [0.0860, 0.1096, 0.1040] lrs = [] for epoch in range(len(losses)): reduce_on_plateau.on_epoch_end(epoch, logs={'val_loss': losses[epoch]}) lrs.append(K.get_value(reduce_on_plateau.model.optimizer.lr)) # The learning rates should be 1.0 except the last one assert all([lr == 1.0 for lr in lrs[:-1]]) and lrs[-1] < 1.0 def test_ReduceLROnPlateau_backwards_compatibility(): import warnings with warnings.catch_warnings(record=True) as ws: reduce_on_plateau = callbacks.ReduceLROnPlateau(epsilon=1e-13) # Check if warnings are disabled if os.environ.get("PYTHONWARNINGS") != "ignore": assert "`epsilon` argument is deprecated" in str(ws[0].message) assert not hasattr(reduce_on_plateau, 'epsilon') assert hasattr(reduce_on_plateau, 'min_delta') assert reduce_on_plateau.min_delta == 1e-13 def test_CSVLogger(tmpdir): np.random.seed(1337) filepath = str(tmpdir / 'log.tsv') sep = '\t' (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) def make_model(): np.random.seed(1337) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(lr=0.1), metrics=['accuracy']) return model # case 1, create new file with defined separator model = make_model() cbks = [callbacks.CSVLogger(filepath, separator=sep)] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=1) assert os.path.isfile(filepath) with open(filepath) as csvfile: dialect = Sniffer().sniff(csvfile.read()) assert dialect.delimiter == sep del model del cbks # case 2, append data to existing file, skip header model = make_model() cbks = [callbacks.CSVLogger(filepath, separator=sep, append=True)] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=1) # case 3, reuse of CSVLogger object model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=2) import re with open(filepath) as csvfile: list_lines = csvfile.readlines() for line in list_lines: assert line.count(sep) == 4 assert len(list_lines) == 5 output = " ".join(list_lines) assert len(re.findall('epoch', output)) == 1 os.remove(filepath) assert not tmpdir.listdir() @pytest.mark.parametrize('update_freq', ['batch', 'epoch', 9]) def test_TensorBoard(tmpdir, update_freq): np.random.seed(np.random.randint(1, 1e7)) filepath = str(tmpdir / 'logs') (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) class DummyStatefulMetric(Layer): def __init__(self, name='dummy_stateful_metric', **kwargs): super(DummyStatefulMetric, self).__init__(name=name, **kwargs) self.stateful = True self.state = K.variable(value=0, dtype='int32') def reset_states(self): pass def __call__(self, y_true, y_pred): return self.state inp = Input((input_dim,)) hidden = Dense(num_hidden, activation='relu')(inp) hidden = Dropout(0.1)(hidden) output = Dense(num_classes, activation='softmax')(hidden) model = Model(inputs=inp, outputs=output) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy', DummyStatefulMetric()]) # we must generate new callbacks for each test, as they aren't stateless def callbacks_factory(histogram_freq, embeddings_freq=1): return [callbacks.TensorBoard(log_dir=filepath, histogram_freq=histogram_freq, write_images=True, write_grads=True, embeddings_freq=embeddings_freq, embeddings_layer_names=['dense_1'], embeddings_data=X_test, batch_size=5, update_freq=update_freq)] # fit without validation data model.fit(X_train, y_train, batch_size=batch_size, callbacks=callbacks_factory(histogram_freq=0, embeddings_freq=0), epochs=3) # fit with validation data and accuracy model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=callbacks_factory(histogram_freq=0), epochs=2) # fit generator without validation data train_generator = data_generator(X_train, y_train, batch_size) model.fit_generator(train_generator, len(X_train), epochs=2, callbacks=callbacks_factory(histogram_freq=0, embeddings_freq=0)) # fit generator with validation data and accuracy train_generator = data_generator(X_train, y_train, batch_size) model.fit_generator(train_generator, len(X_train), epochs=2, validation_data=(X_test, y_test), callbacks=callbacks_factory(histogram_freq=1)) assert os.path.isdir(filepath) shutil.rmtree(filepath) assert not tmpdir.listdir() @pytest.mark.skipif((K.backend() != 'tensorflow'), reason='Requires TensorFlow backend') def test_TensorBoard_histogram_freq_must_have_validation_data(tmpdir): np.random.seed(np.random.randint(1, 1e7)) filepath = str(tmpdir / 'logs') (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) inp = Input((input_dim,)) hidden = Dense(num_hidden, activation='relu')(inp) hidden = Dropout(0.1)(hidden) output = Dense(num_classes, activation='softmax')(hidden) model = Model(inputs=inp, outputs=output) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) # we must generate new callbacks for each test, as they aren't stateless def callbacks_factory(histogram_freq, embeddings_freq=1): return [callbacks.TensorBoard(log_dir=filepath, histogram_freq=histogram_freq, write_images=True, write_grads=True, embeddings_freq=embeddings_freq, embeddings_layer_names=['dense_1'], embeddings_data=X_test, batch_size=5)] # fit without validation data should raise ValueError if histogram_freq > 0 with pytest.raises(ValueError) as raised_exception: model.fit(X_train, y_train, batch_size=batch_size, callbacks=callbacks_factory(histogram_freq=1), epochs=3) assert 'validation_data must be provided' in str(raised_exception.value) train_generator = data_generator(X_train, y_train, batch_size) validation_generator = data_generator(X_test, y_test, batch_size) # fit generator without validation data should raise ValueError if # histogram_freq > 0 with pytest.raises(ValueError) as raised_exception: model.fit_generator(train_generator, len(X_train), epochs=2, callbacks=callbacks_factory(histogram_freq=1)) assert 'validation_data must be provided' in str(raised_exception.value) # fit generator with validation data generator should raise ValueError if # histogram_freq > 0 with pytest.raises(ValueError) as raised_exception: model.fit_generator(train_generator, len(X_train), epochs=2, validation_data=validation_generator, validation_steps=1, callbacks=callbacks_factory(histogram_freq=1)) assert 'validation_data must be provided' in str(raised_exception.value) def test_TensorBoard_multi_input_output(tmpdir): np.random.seed(np.random.randint(1, 1e7)) filepath = str(tmpdir / 'logs') (X_train, y_train), (X_test, y_test) = get_data_callbacks( input_shape=(input_dim, input_dim)) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) inp1 = Input((input_dim, input_dim)) inp2 = Input((input_dim, input_dim)) inp_3d = add([inp1, inp2]) inp_2d = GlobalAveragePooling1D()(inp_3d) # test a layer with a list of output tensors inp_pair = Lambda(lambda x: x)([inp_3d, inp_2d]) hidden = dot(inp_pair, axes=-1) hidden = Dense(num_hidden, activation='relu')(hidden) hidden = Dropout(0.1)(hidden) output1 = Dense(num_classes, activation='softmax')(hidden) output2 = Dense(num_classes, activation='softmax')(hidden) model = Model(inputs=[inp1, inp2], outputs=[output1, output2]) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) # we must generate new callbacks for each test, as they aren't stateless def callbacks_factory(histogram_freq, embeddings_freq=1): return [callbacks.TensorBoard(log_dir=filepath, histogram_freq=histogram_freq, write_images=True, write_grads=True, embeddings_freq=embeddings_freq, embeddings_layer_names=['dense_1'], embeddings_data=[X_test] * 2, batch_size=5)] # fit without validation data model.fit([X_train] * 2, [y_train] * 2, batch_size=batch_size, callbacks=callbacks_factory(histogram_freq=0, embeddings_freq=0), epochs=3) # fit with validation data and accuracy model.fit([X_train] * 2, [y_train] * 2, batch_size=batch_size, validation_data=([X_test] * 2, [y_test] * 2), callbacks=callbacks_factory(histogram_freq=1), epochs=2) train_generator = data_generator([X_train] * 2, [y_train] * 2, batch_size) # fit generator without validation data model.fit_generator(train_generator, len(X_train), epochs=2, callbacks=callbacks_factory(histogram_freq=0, embeddings_freq=0)) # fit generator with validation data and accuracy model.fit_generator(train_generator, len(X_train), epochs=2, validation_data=([X_test] * 2, [y_test] * 2), callbacks=callbacks_factory(histogram_freq=1)) assert os.path.isdir(filepath) shutil.rmtree(filepath) assert not tmpdir.listdir() def test_TensorBoard_convnet(tmpdir): np.random.seed(np.random.randint(1, 1e7)) filepath = str(tmpdir / 'logs') input_shape = (16, 16, 3) (x_train, y_train), (x_test, y_test) = get_data_callbacks( num_train=500, num_test=200, input_shape=input_shape) y_train = np_utils.to_categorical(y_train) y_test = np_utils.to_categorical(y_test) model = Sequential([ Conv2D(filters=8, kernel_size=3, activation='relu', input_shape=input_shape), MaxPooling2D(pool_size=2), Conv2D(filters=4, kernel_size=(3, 3), activation='relu', padding='same'), GlobalAveragePooling2D(), Dense(num_classes, activation='softmax') ]) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1, write_images=True, write_grads=True, batch_size=16) cbks = [tsb] model.summary() history = model.fit(x_train, y_train, epochs=2, batch_size=16, validation_data=(x_test, y_test), callbacks=cbks, verbose=0) assert os.path.isdir(filepath) shutil.rmtree(filepath) assert not tmpdir.listdir() def test_TensorBoard_display_float_from_logs(tmpdir): filepath = str(tmpdir / 'logs') input_shape = (3,) (x_train, y_train), _ = get_data_callbacks(num_train=10, num_test=0, input_shape=input_shape) y_train = np_utils.to_categorical(y_train) model = Sequential([ Dense(num_classes, activation='softmax') ]) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') class CustomCallback(callbacks.Callback): def on_epoch_end(self, epoch, logs=None): logs['test'] = 0. tsb = callbacks.TensorBoard(log_dir=filepath, batch_size=16) cbks = [CustomCallback(), tsb] model.fit(x_train, y_train, epochs=2, batch_size=16, callbacks=cbks, verbose=0) assert os.path.isdir(filepath) shutil.rmtree(filepath) assert not tmpdir.listdir() def test_CallbackValData(): np.random.seed(1337) (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) cbk = callbacks.LambdaCallback(on_train_end=lambda x: 1) model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=[cbk], epochs=1) cbk2 = callbacks.LambdaCallback(on_train_end=lambda x: 1) train_generator = data_generator(X_train, y_train, batch_size) model.fit_generator(train_generator, len(X_train), epochs=1, validation_data=(X_test, y_test), callbacks=[cbk2]) # callback validation data should always have x, y, and sample weights assert len(cbk.validation_data) == len(cbk2.validation_data) == 3 assert cbk.validation_data[0] is cbk2.validation_data[0] assert cbk.validation_data[1] is cbk2.validation_data[1] assert cbk.validation_data[2].shape == cbk2.validation_data[2].shape def test_LambdaCallback(): np.random.seed(1337) (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) # Start an arbitrary process that should run during model training and # be terminated after training has completed. def f(): while True: pass p = multiprocessing.Process(target=f) p.start() cleanup_callback = callbacks.LambdaCallback( on_train_end=lambda logs: p.terminate()) cbks = [cleanup_callback] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=5) p.join() assert not p.is_alive() def test_TensorBoard_with_ReduceLROnPlateau(tmpdir): import shutil np.random.seed(np.random.randint(1, 1e7)) filepath = str(tmpdir / 'logs') (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_classes, activation='softmax')) model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy']) cbks = [ callbacks.ReduceLROnPlateau( monitor='val_loss', factor=0.5, patience=4, verbose=1), callbacks.TensorBoard( log_dir=filepath)] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=2) assert os.path.isdir(filepath) shutil.rmtree(filepath) assert not tmpdir.listdir() def tests_RemoteMonitor(): (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) cbks = [callbacks.RemoteMonitor()] with patch('requests.post'): model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=1) def tests_RemoteMonitorWithJsonPayload(): (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) cbks = [callbacks.RemoteMonitor(send_as_json=True)] with patch('requests.post'): model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=1) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/layers/0000755000000000116100000000000013355226624016077 5ustar rooteng00000000000000Keras-2.2.4/tests/keras/layers/advanced_activations_test.py0000644000000000116100000000453213354530144023657 0ustar rooteng00000000000000import pytest from keras.utils.test_utils import layer_test from keras import layers from keras import backend as K def test_leaky_relu(): for alpha in [0., .5, -1.]: layer_test(layers.LeakyReLU, kwargs={'alpha': alpha}, input_shape=(2, 3, 4)) def test_prelu(): layer_test(layers.PReLU, kwargs={}, input_shape=(2, 3, 4)) def test_prelu_share(): layer_test(layers.PReLU, kwargs={'shared_axes': 1}, input_shape=(2, 3, 4)) def test_elu(): for alpha in [0., .5, -1.]: layer_test(layers.ELU, kwargs={'alpha': alpha}, input_shape=(2, 3, 4)) def test_thresholded_relu(): layer_test(layers.ThresholdedReLU, kwargs={'theta': 0.5}, input_shape=(2, 3, 4)) def test_softmax(): for axis in [1, -1]: layer_test(layers.Softmax, kwargs={'axis': axis}, input_shape=(2, 3, 4)) def test_relu(): layer_test(layers.ReLU, kwargs={'max_value': 10, 'negative_slope': 0.2, 'threshold': 3.0}, input_shape=(2, 3, 4)) layer_test(layers.ReLU, kwargs={'max_value': 6}, input_shape=(2, 3, 4)) layer_test(layers.ReLU, kwargs={'negative_slope': 0.2}, input_shape=(2, 3, 4)) # max_value of ReLU layer cannot be negative value with pytest.raises(ValueError): layer_test(layers.ReLU, kwargs={'max_value': -2.0}, input_shape=(2, 3, 4)) # negative_slope of ReLU layer cannot be negative value with pytest.raises(ValueError): layer_test(layers.ReLU, kwargs={'negative_slope': -2.0}, input_shape=(2, 3, 4)) @pytest.mark.skipif((K.backend() != 'tensorflow'), reason='TF-specific implementation.') def test_relu_tf_ops(): inputs = layers.Input((3,)) # Test that `relu` op gets used. outputs = layers.ReLU()(inputs) assert outputs.op.name.lower().endswith('/relu') # Test that `leakyrelu` op gets used. outputs = layers.ReLU(negative_slope=0.2)(inputs) assert outputs.op.name.lower().endswith('/leakyrelu') # Test that `relu6` op gets used. outputs = layers.ReLU(max_value=6)(inputs) assert outputs.op.name.lower().endswith('/relu6') if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/layers/recurrent_test.py0000644000000000116100000010231313354530144021513 0ustar rooteng00000000000000import pytest import numpy as np from numpy.testing import assert_allclose import keras from keras.utils.test_utils import layer_test from keras.layers import recurrent from keras.layers import embeddings from keras.models import Sequential from keras.models import Model from keras.engine import Input from keras.layers import Masking from keras import regularizers from keras import backend as K num_samples, timesteps, embedding_dim, units = 2, 5, 4, 3 embedding_num = 12 rnn_test = pytest.mark.parametrize('layer_class', [recurrent.SimpleRNN, recurrent.GRU, recurrent.LSTM]) rnn_cell_test = pytest.mark.parametrize('cell_class', [recurrent.SimpleRNNCell, recurrent.GRUCell, recurrent.LSTMCell]) @rnn_test def test_return_sequences(layer_class): layer_test(layer_class, kwargs={'units': units, 'return_sequences': True}, input_shape=(num_samples, timesteps, embedding_dim)) @rnn_test def test_dynamic_behavior(layer_class): layer = layer_class(units, input_shape=(None, embedding_dim)) model = Sequential() model.add(layer) model.compile('sgd', 'mse') x = np.random.random((num_samples, timesteps, embedding_dim)) y = np.random.random((num_samples, units)) model.train_on_batch(x, y) @rnn_test def test_stateful_invalid_use(layer_class): layer = layer_class(units, stateful=True, batch_input_shape=(num_samples, timesteps, embedding_dim)) model = Sequential() model.add(layer) model.compile('sgd', 'mse') x = np.random.random((num_samples * 2, timesteps, embedding_dim)) y = np.random.random((num_samples * 2, units)) with pytest.raises(ValueError): model.fit(x, y) with pytest.raises(ValueError): model.predict(x, batch_size=num_samples + 1) @rnn_test @pytest.mark.skipif((K.backend() in ['theano']), reason='Not supported.') def test_dropout(layer_class): for unroll in [True, False]: layer_test(layer_class, kwargs={'units': units, 'dropout': 0.1, 'recurrent_dropout': 0.1, 'unroll': unroll}, input_shape=(num_samples, timesteps, embedding_dim)) # Test that dropout is applied during training x = K.ones((num_samples, timesteps, embedding_dim)) layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, input_shape=(timesteps, embedding_dim)) y = layer(x) assert y._uses_learning_phase y = layer(x, training=True) assert not getattr(y, '_uses_learning_phase') # Test that dropout is not applied during testing x = np.random.random((num_samples, timesteps, embedding_dim)) layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, unroll=unroll, input_shape=(timesteps, embedding_dim)) model = Sequential([layer]) assert model.uses_learning_phase y1 = model.predict(x) y2 = model.predict(x) assert_allclose(y1, y2) @rnn_test def test_statefulness(layer_class): model = Sequential() model.add(embeddings.Embedding(embedding_num, embedding_dim, mask_zero=True, input_length=timesteps, batch_input_shape=(num_samples, timesteps))) layer = layer_class(units, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((num_samples, timesteps))) assert(out1.shape == (num_samples, units)) # train once so that the states change model.train_on_batch(np.ones((num_samples, timesteps)), np.ones((num_samples, units))) out2 = model.predict(np.ones((num_samples, timesteps))) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((num_samples, timesteps))) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((num_samples, timesteps))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((num_samples, timesteps))) assert(out4.max() != out5.max()) @rnn_test def test_masking_correctness(layer_class): # Check masking: output with left padding and right padding # should be the same. model = Sequential() model.add(embeddings.Embedding(embedding_num, embedding_dim, mask_zero=True, input_length=timesteps, batch_input_shape=(num_samples, timesteps))) layer = layer_class(units, return_sequences=False) model.add(layer) model.compile(optimizer='sgd', loss='mse') left_padded_input = np.ones((num_samples, timesteps)) left_padded_input[0, :1] = 0 left_padded_input[1, :2] = 0 out6 = model.predict(left_padded_input) right_padded_input = np.ones((num_samples, timesteps)) right_padded_input[0, -1:] = 0 right_padded_input[1, -2:] = 0 out7 = model.predict(right_padded_input) assert_allclose(out7, out6, atol=1e-5) @rnn_test def test_implementation_mode(layer_class): for mode in [1, 2]: # Without dropout layer_test(layer_class, kwargs={'units': units, 'implementation': mode}, input_shape=(num_samples, timesteps, embedding_dim)) # With dropout layer_test(layer_class, kwargs={'units': units, 'implementation': mode, 'dropout': 0.1, 'recurrent_dropout': 0.1}, input_shape=(num_samples, timesteps, embedding_dim)) # Without bias layer_test(layer_class, kwargs={'units': units, 'implementation': mode, 'use_bias': False}, input_shape=(num_samples, timesteps, embedding_dim)) @rnn_test def test_regularizer(layer_class): layer = layer_class(units, return_sequences=False, weights=None, input_shape=(timesteps, embedding_dim), kernel_regularizer=regularizers.l1(0.01), recurrent_regularizer=regularizers.l1(0.01), bias_regularizer='l2') layer.build((None, None, embedding_dim)) assert len(layer.losses) == 3 assert len(layer.cell.losses) == 3 layer = layer_class(units, return_sequences=False, weights=None, input_shape=(timesteps, embedding_dim), activity_regularizer='l2') assert layer.activity_regularizer x = K.variable(np.ones((num_samples, timesteps, embedding_dim))) layer(x) assert len(layer.cell.get_losses_for(x)) == 0 assert len(layer.get_losses_for(x)) == 1 @rnn_test def test_trainability(layer_class): layer = layer_class(units) layer.build((None, None, embedding_dim)) assert len(layer.weights) == 3 assert len(layer.trainable_weights) == 3 assert len(layer.non_trainable_weights) == 0 layer.trainable = False assert len(layer.weights) == 3 assert len(layer.trainable_weights) == 0 assert len(layer.non_trainable_weights) == 3 layer.trainable = True assert len(layer.weights) == 3 assert len(layer.trainable_weights) == 3 assert len(layer.non_trainable_weights) == 0 def test_masking_layer(): ''' This test based on a previously failing issue here: https://github.com/keras-team/keras/issues/1567 ''' inputs = np.random.random((6, 3, 4)) targets = np.abs(np.random.random((6, 3, 5))) targets /= targets.sum(axis=-1, keepdims=True) model = Sequential() model.add(Masking(input_shape=(3, 4))) model.add(recurrent.SimpleRNN(units=5, return_sequences=True, unroll=False)) model.compile(loss='categorical_crossentropy', optimizer='adam') model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1) model = Sequential() model.add(Masking(input_shape=(3, 4))) model.add(recurrent.SimpleRNN(units=5, return_sequences=True, unroll=True)) model.compile(loss='categorical_crossentropy', optimizer='adam') model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1) @rnn_test def test_from_config(layer_class): stateful_flags = (False, True) for stateful in stateful_flags: l1 = layer_class(units=1, stateful=stateful) l2 = layer_class.from_config(l1.get_config()) assert l1.get_config() == l2.get_config() @rnn_test def test_specify_initial_state_keras_tensor(layer_class): num_states = 2 if layer_class is recurrent.LSTM else 1 # Test with Keras tensor inputs = Input((timesteps, embedding_dim)) initial_state = [Input((units,)) for _ in range(num_states)] layer = layer_class(units) if len(initial_state) == 1: output = layer(inputs, initial_state=initial_state[0]) else: output = layer(inputs, initial_state=initial_state) assert initial_state[0] in layer._inbound_nodes[0].input_tensors model = Model([inputs] + initial_state, output) model.compile(loss='categorical_crossentropy', optimizer='adam') inputs = np.random.random((num_samples, timesteps, embedding_dim)) initial_state = [np.random.random((num_samples, units)) for _ in range(num_states)] targets = np.random.random((num_samples, units)) model.fit([inputs] + initial_state, targets) @rnn_test def test_specify_initial_state_non_keras_tensor(layer_class): num_states = 2 if layer_class is recurrent.LSTM else 1 # Test with non-Keras tensor inputs = Input((timesteps, embedding_dim)) initial_state = [K.random_normal_variable((num_samples, units), 0, 1) for _ in range(num_states)] layer = layer_class(units) output = layer(inputs, initial_state=initial_state) model = Model(inputs, output) model.compile(loss='categorical_crossentropy', optimizer='adam') inputs = np.random.random((num_samples, timesteps, embedding_dim)) targets = np.random.random((num_samples, units)) model.fit(inputs, targets) @rnn_test def test_reset_states_with_values(layer_class): num_states = 2 if layer_class is recurrent.LSTM else 1 layer = layer_class(units, stateful=True) layer.build((num_samples, timesteps, embedding_dim)) layer.reset_states() assert len(layer.states) == num_states assert layer.states[0] is not None np.testing.assert_allclose(K.eval(layer.states[0]), np.zeros(K.int_shape(layer.states[0])), atol=1e-4) state_shapes = [K.int_shape(state) for state in layer.states] values = [np.ones(shape) for shape in state_shapes] if len(values) == 1: values = values[0] layer.reset_states(values) np.testing.assert_allclose(K.eval(layer.states[0]), np.ones(K.int_shape(layer.states[0])), atol=1e-4) # Test fit with invalid data with pytest.raises(ValueError): layer.reset_states([1] * (len(layer.states) + 1)) @rnn_test def test_initial_states_as_other_inputs(layer_class): num_states = 2 if layer_class is recurrent.LSTM else 1 # Test with Keras tensor main_inputs = Input((timesteps, embedding_dim)) initial_state = [Input((units,)) for _ in range(num_states)] inputs = [main_inputs] + initial_state layer = layer_class(units) output = layer(inputs) assert initial_state[0] in layer._inbound_nodes[0].input_tensors model = Model(inputs, output) model.compile(loss='categorical_crossentropy', optimizer='adam') main_inputs = np.random.random((num_samples, timesteps, embedding_dim)) initial_state = [np.random.random((num_samples, units)) for _ in range(num_states)] targets = np.random.random((num_samples, units)) model.train_on_batch([main_inputs] + initial_state, targets) @rnn_test def test_specify_state_with_masking(layer_class): ''' This test based on a previously failing issue here: https://github.com/keras-team/keras/issues/1567 ''' num_states = 2 if layer_class is recurrent.LSTM else 1 inputs = Input((timesteps, embedding_dim)) _ = Masking()(inputs) initial_state = [Input((units,)) for _ in range(num_states)] output = layer_class(units)(inputs, initial_state=initial_state) model = Model([inputs] + initial_state, output) model.compile(loss='categorical_crossentropy', optimizer='adam') inputs = np.random.random((num_samples, timesteps, embedding_dim)) initial_state = [np.random.random((num_samples, units)) for _ in range(num_states)] targets = np.random.random((num_samples, units)) model.fit([inputs] + initial_state, targets) @rnn_test def test_return_state(layer_class): num_states = 2 if layer_class is recurrent.LSTM else 1 inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim)) layer = layer_class(units, return_state=True, stateful=True) outputs = layer(inputs) output, state = outputs[0], outputs[1:] assert len(state) == num_states model = Model(inputs, state[0]) inputs = np.random.random((num_samples, timesteps, embedding_dim)) state = model.predict(inputs) np.testing.assert_allclose(K.eval(layer.states[0]), state, atol=1e-4) @rnn_test def test_state_reuse(layer_class): inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim)) layer = layer_class(units, return_state=True, return_sequences=True) outputs = layer(inputs) output, state = outputs[0], outputs[1:] output = layer_class(units)(output, initial_state=state) model = Model(inputs, output) inputs = np.random.random((num_samples, timesteps, embedding_dim)) outputs = model.predict(inputs) @rnn_test @pytest.mark.skipif((K.backend() in ['theano']), reason='Not supported.') def test_state_reuse_with_dropout(layer_class): input1 = Input(batch_shape=(num_samples, timesteps, embedding_dim)) layer = layer_class(units, return_state=True, return_sequences=True, dropout=0.2) state = layer(input1)[1:] input2 = Input(batch_shape=(num_samples, timesteps, embedding_dim)) output = layer_class(units)(input2, initial_state=state) model = Model([input1, input2], output) inputs = [np.random.random((num_samples, timesteps, embedding_dim)), np.random.random((num_samples, timesteps, embedding_dim))] outputs = model.predict(inputs) def test_minimal_rnn_cell_non_layer(): class MinimalRNNCell(object): def __init__(self, units, input_dim): self.units = units self.state_size = units self.kernel = keras.backend.variable( np.random.random((input_dim, units))) def call(self, inputs, states): prev_output = states[0] output = keras.backend.dot(inputs, self.kernel) + prev_output return output, [output] # Basic test case. cell = MinimalRNNCell(32, 5) x = keras.Input((None, 5)) layer = recurrent.RNN(cell) y = layer(x) model = keras.models.Model(x, y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) # Test stacking. cells = [MinimalRNNCell(8, 5), MinimalRNNCell(32, 8), MinimalRNNCell(32, 32)] layer = recurrent.RNN(cells) y = layer(x) model = keras.models.Model(x, y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) def test_minimal_rnn_cell_non_layer_multiple_states(): class MinimalRNNCell(object): def __init__(self, units, input_dim): self.units = units self.state_size = (units, units) self.kernel = keras.backend.variable( np.random.random((input_dim, units))) def call(self, inputs, states): prev_output_1 = states[0] prev_output_2 = states[1] output = keras.backend.dot(inputs, self.kernel) output += prev_output_1 output -= prev_output_2 return output, [output * 2, output * 3] # Basic test case. cell = MinimalRNNCell(32, 5) x = keras.Input((None, 5)) layer = recurrent.RNN(cell) y = layer(x) model = keras.models.Model(x, y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) # Test stacking. cells = [MinimalRNNCell(8, 5), MinimalRNNCell(16, 8), MinimalRNNCell(32, 16)] layer = recurrent.RNN(cells) assert layer.cell.state_size == (8, 8, 16, 16, 32, 32) y = layer(x) model = keras.models.Model(x, y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) def test_minimal_rnn_cell_layer(): class MinimalRNNCell(keras.layers.Layer): def __init__(self, units, **kwargs): self.units = units self.state_size = units super(MinimalRNNCell, self).__init__(**kwargs) def build(self, input_shape): # no time axis in the input shape passed to RNN cells assert len(input_shape) == 2 self.kernel = self.add_weight(shape=(input_shape[-1], self.units), initializer='uniform', name='kernel') self.recurrent_kernel = self.add_weight( shape=(self.units, self.units), initializer='uniform', name='recurrent_kernel') self.built = True def call(self, inputs, states): prev_output = states[0] h = keras.backend.dot(inputs, self.kernel) output = h + keras.backend.dot(prev_output, self.recurrent_kernel) return output, [output] def get_config(self): config = {'units': self.units} base_config = super(MinimalRNNCell, self).get_config() return dict(list(base_config.items()) + list(config.items())) # Test basic case. x = keras.Input((None, 5)) cell = MinimalRNNCell(32) layer = recurrent.RNN(cell) y = layer(x) model = keras.models.Model(x, y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) # Test basic case serialization. x_np = np.random.random((6, 5, 5)) y_np = model.predict(x_np) weights = model.get_weights() config = layer.get_config() with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): layer = recurrent.RNN.from_config(config) y = layer(x) model = keras.models.Model(x, y) model.set_weights(weights) y_np_2 = model.predict(x_np) assert_allclose(y_np, y_np_2, atol=1e-4) # Test stacking. cells = [MinimalRNNCell(8), MinimalRNNCell(12), MinimalRNNCell(32)] layer = recurrent.RNN(cells) y = layer(x) model = keras.models.Model(x, y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) # Test stacked RNN serialization. x_np = np.random.random((6, 5, 5)) y_np = model.predict(x_np) weights = model.get_weights() config = layer.get_config() with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): layer = recurrent.RNN.from_config(config) y = layer(x) model = keras.models.Model(x, y) model.set_weights(weights) y_np_2 = model.predict(x_np) assert_allclose(y_np, y_np_2, atol=1e-4) @rnn_cell_test def test_builtin_rnn_cell_layer(cell_class): # Test basic case. x = keras.Input((None, 5)) cell = cell_class(32) layer = recurrent.RNN(cell) y = layer(x) model = keras.models.Model(x, y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) # Test basic case serialization. x_np = np.random.random((6, 5, 5)) y_np = model.predict(x_np) weights = model.get_weights() config = layer.get_config() layer = recurrent.RNN.from_config(config) y = layer(x) model = keras.models.Model(x, y) model.set_weights(weights) y_np_2 = model.predict(x_np) assert_allclose(y_np, y_np_2, atol=1e-4) # Test stacking. cells = [cell_class(8), cell_class(12), cell_class(32)] layer = recurrent.RNN(cells) y = layer(x) model = keras.models.Model(x, y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) # Test stacked RNN serialization. x_np = np.random.random((6, 5, 5)) y_np = model.predict(x_np) weights = model.get_weights() config = layer.get_config() layer = recurrent.RNN.from_config(config) y = layer(x) model = keras.models.Model(x, y) model.set_weights(weights) y_np_2 = model.predict(x_np) assert_allclose(y_np, y_np_2, atol=1e-4) @pytest.mark.skipif((K.backend() in ['cntk', 'theano']), reason='Not supported.') def test_stacked_rnn_dropout(): cells = [recurrent.LSTMCell(3, dropout=0.1, recurrent_dropout=0.1), recurrent.LSTMCell(3, dropout=0.1, recurrent_dropout=0.1)] layer = recurrent.RNN(cells) x = keras.Input((None, 5)) y = layer(x) model = keras.models.Model(x, y) model.compile('sgd', 'mse') x_np = np.random.random((6, 5, 5)) y_np = np.random.random((6, 3)) model.train_on_batch(x_np, y_np) def test_stacked_rnn_attributes(): cells = [recurrent.LSTMCell(3), recurrent.LSTMCell(3, kernel_regularizer='l2')] layer = recurrent.RNN(cells) layer.build((None, None, 5)) # Test regularization losses assert len(layer.losses) == 1 # Test weights assert len(layer.trainable_weights) == 6 cells[0].trainable = False assert len(layer.trainable_weights) == 3 assert len(layer.non_trainable_weights) == 3 # Test `get_losses_for` x = keras.Input((None, 5)) y = K.sum(x) cells[0].add_loss(y, inputs=x) assert layer.get_losses_for(x) == [y] def test_stacked_rnn_compute_output_shape(): cells = [recurrent.LSTMCell(3), recurrent.LSTMCell(6)] layer = recurrent.RNN(cells, return_state=True, return_sequences=True) output_shape = layer.compute_output_shape((None, timesteps, embedding_dim)) expected_output_shape = [(None, timesteps, 6), (None, 3), (None, 3), (None, 6), (None, 6)] assert output_shape == expected_output_shape # Test reverse_state_order = True for stacked cell. stacked_cell = recurrent.StackedRNNCells( cells, reverse_state_order=True) layer = recurrent.RNN( stacked_cell, return_state=True, return_sequences=True) output_shape = layer.compute_output_shape((None, timesteps, embedding_dim)) expected_output_shape = [(None, timesteps, 6), (None, 6), (None, 6), (None, 3), (None, 3)] assert output_shape == expected_output_shape @rnn_test def test_batch_size_equal_one(layer_class): inputs = Input(batch_shape=(1, timesteps, embedding_dim)) layer = layer_class(units) outputs = layer(inputs) model = Model(inputs, outputs) model.compile('sgd', 'mse') x = np.random.random((1, timesteps, embedding_dim)) y = np.random.random((1, units)) model.train_on_batch(x, y) def test_rnn_cell_with_constants_layer(): class RNNCellWithConstants(keras.layers.Layer): def __init__(self, units, **kwargs): self.units = units self.state_size = units super(RNNCellWithConstants, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list): raise TypeError('expects constants shape') [input_shape, constant_shape] = input_shape # will (and should) raise if more than one constant passed self.input_kernel = self.add_weight( shape=(input_shape[-1], self.units), initializer='uniform', name='kernel') self.recurrent_kernel = self.add_weight( shape=(self.units, self.units), initializer='uniform', name='recurrent_kernel') self.constant_kernel = self.add_weight( shape=(constant_shape[-1], self.units), initializer='uniform', name='constant_kernel') self.built = True def call(self, inputs, states, constants): [prev_output] = states [constant] = constants h_input = keras.backend.dot(inputs, self.input_kernel) h_state = keras.backend.dot(prev_output, self.recurrent_kernel) h_const = keras.backend.dot(constant, self.constant_kernel) output = h_input + h_state + h_const return output, [output] def get_config(self): config = {'units': self.units} base_config = super(RNNCellWithConstants, self).get_config() return dict(list(base_config.items()) + list(config.items())) # Test basic case. x = keras.Input((None, 5)) c = keras.Input((3,)) cell = RNNCellWithConstants(32) layer = recurrent.RNN(cell) y = layer(x, constants=c) model = keras.models.Model([x, c], y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch( [np.zeros((6, 5, 5)), np.zeros((6, 3))], np.zeros((6, 32)) ) # Test basic case serialization. x_np = np.random.random((6, 5, 5)) c_np = np.random.random((6, 3)) y_np = model.predict([x_np, c_np]) weights = model.get_weights() config = layer.get_config() custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} with keras.utils.CustomObjectScope(custom_objects): layer = recurrent.RNN.from_config(config.copy()) y = layer(x, constants=c) model = keras.models.Model([x, c], y) model.set_weights(weights) y_np_2 = model.predict([x_np, c_np]) assert_allclose(y_np, y_np_2, atol=1e-4) # test flat list inputs with keras.utils.CustomObjectScope(custom_objects): layer = recurrent.RNN.from_config(config.copy()) y = layer([x, c]) model = keras.models.Model([x, c], y) model.set_weights(weights) y_np_3 = model.predict([x_np, c_np]) assert_allclose(y_np, y_np_3, atol=1e-4) # Test stacking. cells = [recurrent.GRUCell(8), RNNCellWithConstants(12), RNNCellWithConstants(32)] layer = recurrent.RNN(cells) y = layer(x, constants=c) model = keras.models.Model([x, c], y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch( [np.zeros((6, 5, 5)), np.zeros((6, 3))], np.zeros((6, 32)) ) # Test stacked RNN serialization. x_np = np.random.random((6, 5, 5)) c_np = np.random.random((6, 3)) y_np = model.predict([x_np, c_np]) weights = model.get_weights() config = layer.get_config() with keras.utils.CustomObjectScope(custom_objects): layer = recurrent.RNN.from_config(config.copy()) y = layer(x, constants=c) model = keras.models.Model([x, c], y) model.set_weights(weights) y_np_2 = model.predict([x_np, c_np]) assert_allclose(y_np, y_np_2, atol=1e-4) def test_rnn_cell_with_constants_layer_passing_initial_state(): class RNNCellWithConstants(keras.layers.Layer): def __init__(self, units, **kwargs): self.units = units self.state_size = units super(RNNCellWithConstants, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list): raise TypeError('expects constants shape') [input_shape, constant_shape] = input_shape # will (and should) raise if more than one constant passed self.input_kernel = self.add_weight( shape=(input_shape[-1], self.units), initializer='uniform', name='kernel') self.recurrent_kernel = self.add_weight( shape=(self.units, self.units), initializer='uniform', name='recurrent_kernel') self.constant_kernel = self.add_weight( shape=(constant_shape[-1], self.units), initializer='uniform', name='constant_kernel') self.built = True def call(self, inputs, states, constants): [prev_output] = states [constant] = constants h_input = keras.backend.dot(inputs, self.input_kernel) h_state = keras.backend.dot(prev_output, self.recurrent_kernel) h_const = keras.backend.dot(constant, self.constant_kernel) output = h_input + h_state + h_const return output, [output] def get_config(self): config = {'units': self.units} base_config = super(RNNCellWithConstants, self).get_config() return dict(list(base_config.items()) + list(config.items())) # Test basic case. x = keras.Input((None, 5)) c = keras.Input((3,)) s = keras.Input((32,)) cell = RNNCellWithConstants(32) layer = recurrent.RNN(cell) y = layer(x, initial_state=s, constants=c) model = keras.models.Model([x, s, c], y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch( [np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 3))], np.zeros((6, 32)) ) # Test basic case serialization. x_np = np.random.random((6, 5, 5)) s_np = np.random.random((6, 32)) c_np = np.random.random((6, 3)) y_np = model.predict([x_np, s_np, c_np]) weights = model.get_weights() config = layer.get_config() custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} with keras.utils.CustomObjectScope(custom_objects): layer = recurrent.RNN.from_config(config.copy()) y = layer(x, initial_state=s, constants=c) model = keras.models.Model([x, s, c], y) model.set_weights(weights) y_np_2 = model.predict([x_np, s_np, c_np]) assert_allclose(y_np, y_np_2, atol=1e-4) # verify that state is used y_np_2_different_s = model.predict([x_np, s_np + 10., c_np]) with pytest.raises(AssertionError): assert_allclose(y_np, y_np_2_different_s, atol=1e-4) # test flat list inputs with keras.utils.CustomObjectScope(custom_objects): layer = recurrent.RNN.from_config(config.copy()) y = layer([x, s, c]) model = keras.models.Model([x, s, c], y) model.set_weights(weights) y_np_3 = model.predict([x_np, s_np, c_np]) assert_allclose(y_np, y_np_3, atol=1e-4) @rnn_test def test_rnn_cell_identity_initializer(layer_class): inputs = Input(shape=(timesteps, embedding_dim)) layer = layer_class(units, recurrent_initializer='identity') layer(inputs) recurrent_kernel = layer.get_weights()[1] num_kernels = recurrent_kernel.shape[1] // recurrent_kernel.shape[0] assert np.array_equal(recurrent_kernel, np.concatenate([np.identity(units)] * num_kernels, axis=1)) @pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.') def test_inconsistent_output_state_size(): class PlusOneRNNCell(keras.layers.Layer): """Add one to the input and state. This cell is used for testing state_size and output_size.""" def __init__(self, num_unit, **kwargs): self.state_size = num_unit super(PlusOneRNNCell, self).__init__(**kwargs) def build(self, input_shape): self.output_size = input_shape[-1] def call(self, inputs, states): return inputs + 1, [states[0] + 1] batch = 32 time_step = 4 state_size = 5 input_size = 6 cell = PlusOneRNNCell(state_size) x = keras.Input((None, input_size)) layer = recurrent.RNN(cell) y = layer(x) assert cell.state_size == state_size init_state = layer.get_initial_state(x) assert len(init_state) == 1 if K.backend() != 'theano': # theano does not support static shape inference. assert K.int_shape(init_state[0]) == (None, state_size) model = keras.models.Model(x, y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch( np.zeros((batch, time_step, input_size)), np.zeros((batch, input_size))) assert model.output_shape == (None, input_size) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/layers/merge_test.py0000644000000000116100000002204213354530144020601 0ustar rooteng00000000000000import pytest import numpy as np from numpy.testing import assert_allclose from keras import layers from keras import models from keras import backend as K from keras.utils.test_utils import layer_test from keras.layers import merge def test_merge_add(): i1 = layers.Input(shape=(4, 5)) i2 = layers.Input(shape=(4, 5)) i3 = layers.Input(shape=(4, 5)) o = layers.add([i1, i2, i3]) assert o._keras_shape == (None, 4, 5) model = models.Model([i1, i2, i3], o) add_layer = layers.Add() o2 = add_layer([i1, i2, i3]) assert add_layer.output_shape == (None, 4, 5) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) x3 = np.random.random((2, 4, 5)) out = model.predict([x1, x2, x3]) assert out.shape == (2, 4, 5) assert_allclose(out, x1 + x2 + x3, atol=1e-4) assert add_layer.compute_mask([i1, i2, i3], [None, None, None]) is None assert np.all(K.eval(add_layer.compute_mask( [i1, i2, i3], [K.variable(x1), K.variable(x2), K.variable(x3)]))) # Test invalid use case with pytest.raises(ValueError): add_layer.compute_mask([i1, i2, i3], x1) with pytest.raises(ValueError): add_layer.compute_mask(i1, [None, None, None]) with pytest.raises(ValueError): add_layer.compute_mask([i1, i2, i3], [None, None]) def test_merge_subtract(): i1 = layers.Input(shape=(4, 5)) i2 = layers.Input(shape=(4, 5)) i3 = layers.Input(shape=(4, 5)) i4 = layers.Input(shape=(3, 5)) o = layers.subtract([i1, i2]) assert o._keras_shape == (None, 4, 5) model = models.Model([i1, i2], o) subtract_layer = layers.Subtract() o2 = subtract_layer([i1, i2]) assert subtract_layer.output_shape == (None, 4, 5) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) assert out.shape == (2, 4, 5) assert_allclose(out, x1 - x2, atol=1e-4) assert subtract_layer.compute_mask([i1, i2], [None, None]) is None assert np.all(K.eval(subtract_layer.compute_mask( [i1, i2], [K.variable(x1), K.variable(x2)]))) # Test invalid use case with pytest.raises(ValueError): subtract_layer.compute_mask([i1, i2], x1) with pytest.raises(ValueError): subtract_layer.compute_mask(i1, [None, None]) with pytest.raises(ValueError): subtract_layer([i1, i2, i3]) with pytest.raises(ValueError): subtract_layer([i1]) def test_merge_multiply(): i1 = layers.Input(shape=(4, 5)) i2 = layers.Input(shape=(4, 5)) i3 = layers.Input(shape=(4, 5)) o = layers.multiply([i1, i2, i3]) assert o._keras_shape == (None, 4, 5) model = models.Model([i1, i2, i3], o) mul_layer = layers.Multiply() o2 = mul_layer([i1, i2, i3]) assert mul_layer.output_shape == (None, 4, 5) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) x3 = np.random.random((2, 4, 5)) out = model.predict([x1, x2, x3]) assert out.shape == (2, 4, 5) assert_allclose(out, x1 * x2 * x3, atol=1e-4) def test_merge_average(): i1 = layers.Input(shape=(4, 5)) i2 = layers.Input(shape=(4, 5)) o = layers.average([i1, i2]) assert o._keras_shape == (None, 4, 5) model = models.Model([i1, i2], o) avg_layer = layers.Average() o2 = avg_layer([i1, i2]) assert avg_layer.output_shape == (None, 4, 5) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) assert out.shape == (2, 4, 5) assert_allclose(out, 0.5 * (x1 + x2), atol=1e-4) def test_merge_maximum(): i1 = layers.Input(shape=(4, 5)) i2 = layers.Input(shape=(4, 5)) o = layers.maximum([i1, i2]) assert o._keras_shape == (None, 4, 5) model = models.Model([i1, i2], o) max_layer = layers.Maximum() o2 = max_layer([i1, i2]) assert max_layer.output_shape == (None, 4, 5) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) assert out.shape == (2, 4, 5) assert_allclose(out, np.maximum(x1, x2), atol=1e-4) def test_merge_minimum(): i1 = layers.Input(shape=(4, 5)) i2 = layers.Input(shape=(4, 5)) o = layers.minimum([i1, i2]) assert o._keras_shape == (None, 4, 5) model = models.Model([i1, i2], o) max_layer = layers.Minimum() o2 = max_layer([i1, i2]) assert max_layer.output_shape == (None, 4, 5) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) assert out.shape == (2, 4, 5) assert_allclose(out, np.minimum(x1, x2), atol=1e-4) def test_merge_concatenate(): i1 = layers.Input(shape=(None, 5)) i2 = layers.Input(shape=(None, 5)) o = layers.concatenate([i1, i2], axis=1) assert o._keras_shape == (None, None, 5) model = models.Model([i1, i2], o) i1 = layers.Input(shape=(4, 5)) i2 = layers.Input(shape=(4, 5)) o = layers.concatenate([i1, i2], axis=1) assert o._keras_shape == (None, 8, 5) model = models.Model([i1, i2], o) concat_layer = layers.Concatenate(axis=1) o2 = concat_layer([i1, i2]) assert concat_layer.output_shape == (None, 8, 5) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) assert out.shape == (2, 8, 5) assert_allclose(out, np.concatenate([x1, x2], axis=1), atol=1e-4) x3 = np.random.random((1, 1, 1)) nb_layers = 4 x_i = layers.Input(shape=(None, None)) x_list = [x_i] x = x_i for i in range(nb_layers): x_list.append(x) x = layers.concatenate(x_list, axis=1) concat_model = models.Model(x_i, x) concat_out = concat_model.predict([x3]) x3 = np.repeat(x3, 16, axis=1) assert concat_out.shape == (1, 16, 1) assert_allclose(concat_out, x3) assert concat_layer.compute_mask([i1, i2], [None, None]) is None assert np.all(K.eval(concat_layer.compute_mask( [i1, i2], [K.variable(x1), K.variable(x2)])).reshape(-1)) # Test invalid use case with pytest.raises(ValueError): concat_layer.compute_mask([i1, i2], x1) with pytest.raises(ValueError): concat_layer.compute_mask(i1, [None, None]) with pytest.raises(ValueError): concat_layer.compute_mask([i1, i2], [None]) with pytest.raises(ValueError): concat_layer([i1]) def test_merge_dot(): i1 = layers.Input(shape=(4,)) i2 = layers.Input(shape=(4,)) o = layers.dot([i1, i2], axes=1) assert o._keras_shape == (None, 1) model = models.Model([i1, i2], o) dot_layer = layers.Dot(axes=1) o2 = dot_layer([i1, i2]) assert dot_layer.output_shape == (None, 1) x1 = np.random.random((2, 4)) x2 = np.random.random((2, 4)) out = model.predict([x1, x2]) assert out.shape == (2, 1) expected = np.zeros((2, 1)) expected[0, 0] = np.dot(x1[0], x2[0]) expected[1, 0] = np.dot(x1[1], x2[1]) assert_allclose(out, expected, atol=1e-4) # Test with negative tuple of axes. o = layers.dot([i1, i2], axes=(-1, -1)) assert o._keras_shape == (None, 1) model = models.Model([i1, i2], o) out = model.predict([x1, x2]) assert out.shape == (2, 1) assert_allclose(out, expected, atol=1e-4) def test_merge_broadcast(): # shapes provided i1 = layers.Input(shape=(4, 5)) i2 = layers.Input(shape=(5,)) ops = [layers.add, layers.maximum] for op in ops: o = op([i1, i2]) assert o._keras_shape == (None, 4, 5) model = models.Model([i1, i2], o) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 5)) out = model.predict([x1, x2]) assert out.shape == (2, 4, 5) # shapes not provided i1 = layers.Input(shape=(None, None)) i2 = layers.Input(shape=(None,)) ops = [layers.add, layers.maximum] for op in ops: o = op([i1, i2]) assert o._keras_shape == (None, None, None) model = models.Model([i1, i2], o) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 5)) out = model.predict([x1, x2]) assert out.shape == (2, 4, 5) # ndim not provided if K.backend() == 'tensorflow': k_ndim = K.ndim K.ndim = lambda _: None i1 = layers.Input(shape=(None, None)) i2 = layers.Input(shape=(None,)) ops = [layers.add, layers.maximum] for op in ops: o = op([i1, i2]) assert o._keras_shape == (None, None, None) model = models.Model([i1, i2], o) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 5)) out = model.predict([x1, x2]) assert out.shape == (2, 4, 5) K.ndim = k_ndim def test_masking_concatenate(): input1 = layers.Input(shape=(6,)) input2 = layers.Input(shape=(6,)) x1 = layers.Embedding(10, 5, input_length=6, mask_zero=True)(input1) x2 = layers.Embedding(10, 5, input_length=6, mask_zero=True)(input2) x = layers.concatenate([x1, x2]) x = layers.wrappers.TimeDistributed(layers.Dense(3, activation='softmax'))(x) models.Model(inputs=[input1, input2], outputs=[x]) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/layers/local_test.py0000644000000000116100000000373113354530144020600 0ustar rooteng00000000000000import pytest from keras.utils.test_utils import layer_test from keras.layers import local def test_locallyconnected_1d(): num_samples = 2 num_steps = 8 input_dim = 5 filter_length = 3 filters = 4 padding = 'valid' strides = 1 layer_test(local.LocallyConnected1D, kwargs={'filters': filters, 'kernel_size': filter_length, 'padding': padding, 'kernel_regularizer': 'l2', 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'strides': strides}, input_shape=(num_samples, num_steps, input_dim)) def test_locallyconnected_2d(): num_samples = 5 filters = 3 stack_size = 4 num_row = 6 num_col = 8 padding = 'valid' for strides in [(1, 1), (2, 2)]: layer_test(local.LocallyConnected2D, kwargs={'filters': filters, 'kernel_size': 3, 'padding': padding, 'kernel_regularizer': 'l2', 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'strides': strides, 'data_format': 'channels_last'}, input_shape=(num_samples, num_row, num_col, stack_size)) layer_test(local.LocallyConnected2D, kwargs={'filters': filters, 'kernel_size': (3, 3), 'padding': padding, 'kernel_regularizer': 'l2', 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'strides': strides, 'data_format': 'channels_first'}, input_shape=(num_samples, stack_size, num_row, num_col)) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/layers/embeddings_test.py0000644000000000116100000000330213354530144021601 0ustar rooteng00000000000000import pytest from keras.utils.test_utils import layer_test from keras.layers.embeddings import Embedding from keras.models import Sequential import keras.backend as K def test_embedding(): layer_test(Embedding, kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2}, input_shape=(3, 2), input_dtype='int32', expected_output_dtype=K.floatx()) layer_test(Embedding, kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True}, input_shape=(3, 2), input_dtype='int32', expected_output_dtype=K.floatx()) layer_test(Embedding, kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True}, input_shape=(3, 2, 5), input_dtype='int32', expected_output_dtype=K.floatx()) layer_test(Embedding, kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)}, input_shape=(3, 2, 5), input_dtype='int32', expected_output_dtype=K.floatx()) def test_embedding_invalid(): # len(input_length) should be equal to len(input_shape) - 1 with pytest.raises(ValueError): model = Sequential([Embedding( input_dim=10, output_dim=4, input_length=2, input_shape=(3, 4, 5))]) # input_length should be equal to input_shape[1:] with pytest.raises(ValueError): model = Sequential([Embedding( input_dim=10, output_dim=4, input_length=2, input_shape=(3, 5))]) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/layers/normalization_test.py0000644000000000116100000001753513354530144022403 0ustar rooteng00000000000000import pytest import numpy as np from numpy.testing import assert_allclose from keras.layers import Input from keras import regularizers from keras.utils.test_utils import layer_test from keras.layers import normalization from keras.models import Sequential, Model from keras import backend as K input_1 = np.arange(10) input_2 = np.zeros(10) input_3 = np.ones((10)) input_4 = np.expand_dims(np.arange(10.), axis=1) input_shapes = [np.ones((10, 10)), np.ones((10, 10, 10))] def test_basic_batchnorm(): layer_test(normalization.BatchNormalization, kwargs={'momentum': 0.9, 'epsilon': 0.1, 'gamma_regularizer': regularizers.l2(0.01), 'beta_regularizer': regularizers.l2(0.01)}, input_shape=(3, 4, 2)) layer_test(normalization.BatchNormalization, kwargs={'momentum': 0.9, 'epsilon': 0.1, 'axis': 1}, input_shape=(1, 4, 1)) layer_test(normalization.BatchNormalization, kwargs={'gamma_initializer': 'ones', 'beta_initializer': 'ones', 'moving_mean_initializer': 'zeros', 'moving_variance_initializer': 'ones'}, input_shape=(3, 4, 2, 4)) if K.backend() != 'theano': layer_test(normalization.BatchNormalization, kwargs={'momentum': 0.9, 'epsilon': 0.1, 'axis': 1, 'scale': False, 'center': False}, input_shape=(3, 4, 2, 4)) def test_batchnorm_correctness_1d(): np.random.seed(1337) model = Sequential() norm = normalization.BatchNormalization(input_shape=(10,), momentum=0.8) model.add(norm) model.compile(loss='mse', optimizer='rmsprop') # centered on 5.0, variance 10.0 x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10)) model.fit(x, x, epochs=5, verbose=0) out = model.predict(x) out -= K.eval(norm.beta) out /= K.eval(norm.gamma) assert_allclose(out.mean(), 0.0, atol=1e-1) assert_allclose(out.std(), 1.0, atol=1e-1) def test_batchnorm_correctness_2d(): np.random.seed(1337) model = Sequential() norm = normalization.BatchNormalization(axis=1, input_shape=(10, 6), momentum=0.8) model.add(norm) model.compile(loss='mse', optimizer='rmsprop') # centered on 5.0, variance 10.0 x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10, 6)) model.fit(x, x, epochs=5, verbose=0) out = model.predict(x) out -= np.reshape(K.eval(norm.beta), (1, 10, 1)) out /= np.reshape(K.eval(norm.gamma), (1, 10, 1)) assert_allclose(out.mean(axis=(0, 2)), 0.0, atol=1.1e-1) assert_allclose(out.std(axis=(0, 2)), 1.0, atol=1.1e-1) def test_batchnorm_training_argument(): np.random.seed(1337) bn1 = normalization.BatchNormalization(input_shape=(10,)) x1 = Input(shape=(10,)) y1 = bn1(x1, training=True) assert bn1.updates model1 = Model(x1, y1) x = np.random.normal(loc=5.0, scale=10.0, size=(20, 10)) output_a = model1.predict(x) model1.compile(loss='mse', optimizer='rmsprop') model1.fit(x, x, epochs=1, verbose=0) output_b = model1.predict(x) assert np.abs(np.sum(output_a - output_b)) > 0.1 assert_allclose(output_b.mean(), 0.0, atol=1e-1) assert_allclose(output_b.std(), 1.0, atol=1e-1) bn2 = normalization.BatchNormalization(input_shape=(10,)) x2 = Input(shape=(10,)) bn2(x2, training=False) assert not bn2.updates def test_batchnorm_mode_twice(): # This is a regression test for issue #4881 with the old # batch normalization functions in the Theano backend. model = Sequential() model.add(normalization.BatchNormalization(input_shape=(10, 5, 5), axis=1)) model.add(normalization.BatchNormalization(input_shape=(10, 5, 5), axis=1)) model.compile(loss='mse', optimizer='sgd') x = np.random.normal(loc=5.0, scale=10.0, size=(20, 10, 5, 5)) model.fit(x, x, epochs=1, verbose=0) model.predict(x) def test_batchnorm_convnet(): np.random.seed(1337) model = Sequential() norm = normalization.BatchNormalization(axis=1, input_shape=(3, 4, 4), momentum=0.8) model.add(norm) model.compile(loss='mse', optimizer='sgd') # centered on 5.0, variance 10.0 x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) model.fit(x, x, epochs=4, verbose=0) out = model.predict(x) out -= np.reshape(K.eval(norm.beta), (1, 3, 1, 1)) out /= np.reshape(K.eval(norm.gamma), (1, 3, 1, 1)) assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) @pytest.mark.skipif((K.backend() == 'theano'), reason='Bug with theano backend') def test_batchnorm_convnet_no_center_no_scale(): np.random.seed(1337) model = Sequential() norm = normalization.BatchNormalization(axis=-1, center=False, scale=False, input_shape=(3, 4, 4), momentum=0.8) model.add(norm) model.compile(loss='mse', optimizer='sgd') # centered on 5.0, variance 10.0 x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) model.fit(x, x, epochs=4, verbose=0) out = model.predict(x) assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) def test_shared_batchnorm(): '''Test that a BN layer can be shared across different data streams. ''' # Test single layer reuse bn = normalization.BatchNormalization(input_shape=(10,)) x1 = Input(shape=(10,)) bn(x1) x2 = Input(shape=(10,)) y2 = bn(x2) x = np.random.normal(loc=5.0, scale=10.0, size=(2, 10)) model = Model(x2, y2) assert len(model.updates) == 2 model.compile('sgd', 'mse') model.train_on_batch(x, x) # Test model-level reuse x3 = Input(shape=(10,)) y3 = model(x3) new_model = Model(x3, y3) assert len(model.updates) == 2 new_model.compile('sgd', 'mse') new_model.train_on_batch(x, x) def test_that_trainable_disables_updates(): val_a = np.random.random((10, 4)) val_out = np.random.random((10, 4)) a = Input(shape=(4,)) layer = normalization.BatchNormalization(input_shape=(4,)) b = layer(a) model = Model(a, b) model.trainable = False assert not model.updates model.compile('sgd', 'mse') assert not model.updates x1 = model.predict(val_a) model.train_on_batch(val_a, val_out) x2 = model.predict(val_a) assert_allclose(x1, x2, atol=1e-7) model.trainable = True model.compile('sgd', 'mse') assert model.updates model.train_on_batch(val_a, val_out) x2 = model.predict(val_a) assert np.abs(np.sum(x1 - x2)) > 1e-5 layer.trainable = False model.compile('sgd', 'mse') assert not model.updates x1 = model.predict(val_a) model.train_on_batch(val_a, val_out) x2 = model.predict(val_a) assert_allclose(x1, x2, atol=1e-7) def test_batchnorm_trainable(): bn_mean = 0.5 bn_std = 10. def get_model(bn_mean, bn_std): input = Input(shape=(1,)) x = normalization.BatchNormalization()(input) model = Model(input, x) model.set_weights([np.array([1.]), np.array([0.]), np.array([bn_mean]), np.array([bn_std ** 2])]) return model # Simulates training-mode with trainable layer. Should use mini-batch statistics. K.set_learning_phase(1) model = get_model(bn_mean, bn_std) model.compile(loss='mse', optimizer='rmsprop') out = model.predict(input_4) assert_allclose((input_4 - np.mean(input_4)) / np.std(input_4), out, atol=1e-3) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/layers/convolutional_recurrent_test.py0000644000000000116100000001366613354530144024503 0ustar rooteng00000000000000import pytest import numpy as np from numpy.testing import assert_allclose from keras import backend as K from keras.models import Sequential, Model from keras.layers import convolutional_recurrent, Input from keras.utils.test_utils import layer_test from keras import regularizers num_row = 3 num_col = 3 filters = 2 num_samples = 1 input_channel = 2 input_num_row = 5 input_num_col = 5 sequence_len = 2 def test_convolutional_recurrent(): for data_format in ['channels_first', 'channels_last']: if data_format == 'channels_first': inputs = np.random.rand(num_samples, sequence_len, input_channel, input_num_row, input_num_col) else: inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) for return_sequences in [True, False]: # test for return state: x = Input(batch_shape=inputs.shape) kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'return_state': True, 'stateful': True, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'valid'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.build(inputs.shape) outputs = layer(x) output, states = outputs[0], outputs[1:] assert len(states) == 2 model = Model(x, states[0]) state = model.predict(inputs) np.testing.assert_allclose( K.eval(layer.states[0]), state, atol=1e-4) # test for output shape: output = layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'valid'}, input_shape=inputs.shape) def test_convolutional_recurrent_statefulness(): data_format = 'channels_last' return_sequences = False inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) # Tests for statefulness model = Sequential() kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'stateful': True, 'batch_input_shape': inputs.shape, 'padding': 'same'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(inputs)) # train once so that the states change model.train_on_batch(np.ones_like(inputs), np.random.random(out1.shape)) out2 = model.predict(np.ones_like(inputs)) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(inputs)) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(inputs)) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(inputs)) assert(out4.max() != out5.max()) # cntk doesn't support eval convolution with static # variable, will enable it later if K.backend() != 'cntk': # check regularizers kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'kernel_size': (num_row, num_col), 'stateful': True, 'filters': filters, 'batch_input_shape': inputs.shape, 'kernel_regularizer': regularizers.L1L2(l1=0.01), 'recurrent_regularizer': regularizers.L1L2(l1=0.01), 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'recurrent_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'padding': 'same'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.build(inputs.shape) assert len(layer.losses) == 3 assert layer.activity_regularizer output = layer(K.variable(np.ones(inputs.shape))) assert len(layer.losses) == 4 K.eval(output) # check dropout layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'same', 'dropout': 0.1, 'recurrent_dropout': 0.1}, input_shape=inputs.shape) # check state initialization layer = convolutional_recurrent.ConvLSTM2D( filters=filters, kernel_size=(num_row, num_col), data_format=data_format, return_sequences=return_sequences) layer.build(inputs.shape) x = Input(batch_shape=inputs.shape) initial_state = layer.get_initial_state(x) y = layer(x, initial_state=initial_state) model = Model(x, y) assert (model.predict(inputs).shape == layer.compute_output_shape(inputs.shape)) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/layers/wrappers_test.py0000644000000000116100000006152413354530144021355 0ustar rooteng00000000000000import pytest import numpy as np import copy from numpy.testing import assert_allclose from keras.utils import CustomObjectScope from keras.layers import wrappers, Input, Layer from keras.layers import RNN from keras import layers from keras.models import Sequential, Model, model_from_json from keras import backend as K from keras.utils.generic_utils import object_list_uid, to_list def test_TimeDistributed(): # first, test with Dense layer model = Sequential() model.add(wrappers.TimeDistributed(layers.Dense(2), input_shape=(3, 4))) model.add(layers.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') model.fit(np.random.random((10, 3, 4)), np.random.random((10, 3, 2)), epochs=1, batch_size=10) # test config model.get_config() # test when specifying a batch_input_shape test_input = np.random.random((1, 3, 4)) test_output = model.predict(test_input) weights = model.layers[0].get_weights() reference = Sequential() reference.add(wrappers.TimeDistributed(layers.Dense(2), batch_input_shape=(1, 3, 4))) reference.add(layers.Activation('relu')) reference.compile(optimizer='rmsprop', loss='mse') reference.layers[0].set_weights(weights) reference_output = reference.predict(test_input) assert_allclose(test_output, reference_output, atol=1e-05) # test with Embedding model = Sequential() model.add(wrappers.TimeDistributed(layers.Embedding(5, 6), batch_input_shape=(10, 3, 4), dtype='int32')) model.compile(optimizer='rmsprop', loss='mse') model.fit(np.random.randint(5, size=(10, 3, 4), dtype='int32'), np.random.random((10, 3, 4, 6)), epochs=1, batch_size=10) # compare to not using batch_input_shape test_input = np.random.randint(5, size=(10, 3, 4), dtype='int32') test_output = model.predict(test_input) weights = model.layers[0].get_weights() reference = Sequential() reference.add(wrappers.TimeDistributed(layers.Embedding(5, 6), input_shape=(3, 4), dtype='int32')) reference.compile(optimizer='rmsprop', loss='mse') reference.layers[0].set_weights(weights) reference_output = reference.predict(test_input) assert_allclose(test_output, reference_output, atol=1e-05) # test with Conv2D model = Sequential() model.add(wrappers.TimeDistributed(layers.Conv2D(5, (2, 2), padding='same'), input_shape=(2, 4, 4, 3))) model.add(layers.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch(np.random.random((1, 2, 4, 4, 3)), np.random.random((1, 2, 4, 4, 5))) model = model_from_json(model.to_json()) model.summary() # test stacked layers model = Sequential() model.add(wrappers.TimeDistributed(layers.Dense(2), input_shape=(3, 4))) model.add(wrappers.TimeDistributed(layers.Dense(3))) model.add(layers.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') model.fit(np.random.random((10, 3, 4)), np.random.random((10, 3, 3)), epochs=1, batch_size=10) # test wrapping Sequential model model = Sequential() model.add(layers.Dense(3, input_dim=2)) outer_model = Sequential() outer_model.add(wrappers.TimeDistributed(model, input_shape=(3, 2))) outer_model.compile(optimizer='rmsprop', loss='mse') outer_model.fit(np.random.random((10, 3, 2)), np.random.random((10, 3, 3)), epochs=1, batch_size=10) # test with functional API x = Input(shape=(3, 2)) y = wrappers.TimeDistributed(model)(x) outer_model = Model(x, y) outer_model.compile(optimizer='rmsprop', loss='mse') outer_model.fit(np.random.random((10, 3, 2)), np.random.random((10, 3, 3)), epochs=1, batch_size=10) # test with BatchNormalization model = Sequential() model.add(wrappers.TimeDistributed( layers.BatchNormalization(center=True, scale=True), name='bn', input_shape=(10, 2))) model.compile(optimizer='rmsprop', loss='mse') # Assert that mean and variance are 0 and 1. td = model.layers[0] assert np.array_equal(td.get_weights()[2], np.array([0, 0])) assert np.array_equal(td.get_weights()[3], np.array([1, 1])) # Train model.train_on_batch(np.random.normal(loc=2, scale=2, size=(1, 10, 2)), np.broadcast_to(np.array([0, 1]), (1, 10, 2))) # Assert that mean and variance changed. assert not np.array_equal(td.get_weights()[2], np.array([0, 0])) assert not np.array_equal(td.get_weights()[3], np.array([1, 1])) # Verify input_map has one mapping from inputs to reshaped inputs. uid = object_list_uid(model.inputs) assert len(td._input_map.keys()) == 1 assert uid in td._input_map assert K.int_shape(td._input_map[uid]) == (None, 2) @pytest.mark.skipif((K.backend() == 'cntk'), reason='Flaky with CNTK backend') def test_TimeDistributed_learning_phase(): # test layers that need learning_phase to be set np.random.seed(1234) x = Input(shape=(3, 2)) y = wrappers.TimeDistributed(layers.Dropout(.999))(x, training=True) model = Model(x, y) y = model.predict(np.random.random((10, 3, 2))) assert_allclose(np.mean(y), 0., atol=1e-1, rtol=1e-1) def test_TimeDistributed_trainable(): # test layers that need learning_phase to be set x = Input(shape=(3, 2)) layer = wrappers.TimeDistributed(layers.BatchNormalization()) _ = layer(x) assert len(layer.updates) == 2 assert len(layer.trainable_weights) == 2 layer.trainable = False assert len(layer.updates) == 0 assert len(layer.trainable_weights) == 0 layer.trainable = True assert len(layer.updates) == 2 assert len(layer.trainable_weights) == 2 @pytest.mark.skipif((K.backend() == 'cntk'), reason='Unknown timestamps for RNN not supported in CNTK.') def test_TimeDistributed_with_masked_embedding_and_unspecified_shape(): # test with unspecified shape and Embeddings with mask_zero model = Sequential() model.add(wrappers.TimeDistributed(layers.Embedding(5, 6, mask_zero=True), input_shape=(None, None))) # the shape so far: (N, t_1, t_2, 6) model.add(wrappers.TimeDistributed(layers.SimpleRNN(7, return_sequences=True))) model.add(wrappers.TimeDistributed(layers.SimpleRNN(8, return_sequences=False))) model.add(layers.SimpleRNN(1, return_sequences=False)) model.compile(optimizer='rmsprop', loss='mse') model_input = np.random.randint(low=1, high=5, size=(10, 3, 4), dtype='int32') for i in range(4): model_input[i, i:, i:] = 0 model.fit(model_input, np.random.random((10, 1)), epochs=1, batch_size=10) mask_outputs = [model.layers[0].compute_mask(model.input)] for layer in model.layers[1:]: mask_outputs.append(layer.compute_mask(layer.input, mask_outputs[-1])) func = K.function([model.input], mask_outputs[:-1]) mask_outputs_val = func([model_input]) ref_mask_val_0 = model_input > 0 # embedding layer ref_mask_val_1 = ref_mask_val_0 # first RNN layer ref_mask_val_2 = np.any(ref_mask_val_1, axis=-1) # second RNN layer ref_mask_val = [ref_mask_val_0, ref_mask_val_1, ref_mask_val_2] for i in range(3): assert np.array_equal(mask_outputs_val[i], ref_mask_val[i]) assert mask_outputs[-1] is None # final layer def test_TimeDistributed_with_masking_layer(): # test with Masking layer model = Sequential() model.add(wrappers.TimeDistributed(layers.Masking(mask_value=0.,), input_shape=(None, 4))) model.add(wrappers.TimeDistributed(layers.Dense(5))) model.compile(optimizer='rmsprop', loss='mse') model_input = np.random.randint(low=1, high=5, size=(10, 3, 4)) for i in range(4): model_input[i, i:, :] = 0. model.compile(optimizer='rmsprop', loss='mse') model.fit(model_input, np.random.random((10, 3, 5)), epochs=1, batch_size=6) mask_outputs = [model.layers[0].compute_mask(model.input)] mask_outputs += [model.layers[1].compute_mask(model.layers[1].input, mask_outputs[-1])] func = K.function([model.input], mask_outputs) mask_outputs_val = func([model_input]) assert np.array_equal(mask_outputs_val[0], np.any(model_input, axis=-1)) assert np.array_equal(mask_outputs_val[1], np.any(model_input, axis=-1)) def test_regularizers(): model = Sequential() model.add(wrappers.TimeDistributed( layers.Dense(2, kernel_regularizer='l1'), input_shape=(3, 4))) model.add(layers.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') assert len(model.layers[0].layer.losses) == 1 assert len(model.layers[0].losses) == 1 assert len(model.layers[0].get_losses_for(None)) == 1 assert len(model.losses) == 1 model = Sequential() model.add(wrappers.TimeDistributed( layers.Dense(2, activity_regularizer='l1'), input_shape=(3, 4))) model.add(layers.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') assert len(model.losses) == 1 def test_Bidirectional(): rnn = layers.SimpleRNN samples = 2 dim = 2 timesteps = 2 output_dim = 2 dropout_rate = 0.2 for mode in ['sum', 'concat']: x = np.random.random((samples, timesteps, dim)) target_dim = 2 * output_dim if mode == 'concat' else output_dim y = np.random.random((samples, target_dim)) # test with Sequential model model = Sequential() model.add(wrappers.Bidirectional(rnn(output_dim, dropout=dropout_rate, recurrent_dropout=dropout_rate), merge_mode=mode, input_shape=(timesteps, dim))) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1) # test config model.get_config() model = model_from_json(model.to_json()) model.summary() # test stacked bidirectional layers model = Sequential() model.add(wrappers.Bidirectional(rnn(output_dim, return_sequences=True), merge_mode=mode, input_shape=(timesteps, dim))) model.add(wrappers.Bidirectional(rnn(output_dim), merge_mode=mode)) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1) # test with functional API inputs = Input((timesteps, dim)) outputs = wrappers.Bidirectional(rnn(output_dim, dropout=dropout_rate, recurrent_dropout=dropout_rate), merge_mode=mode)(inputs) model = Model(inputs, outputs) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1) # Bidirectional and stateful inputs = Input(batch_shape=(1, timesteps, dim)) outputs = wrappers.Bidirectional(rnn(output_dim, stateful=True), merge_mode=mode)(inputs) model = Model(inputs, outputs) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1) @pytest.mark.skipif((K.backend() == 'cntk'), reason='Unknown timestamps not supported in CNTK.') def test_Bidirectional_dynamic_timesteps(): # test with functional API with dynamic length rnn = layers.SimpleRNN samples = 2 dim = 2 timesteps = 2 output_dim = 2 dropout_rate = 0.2 for mode in ['sum', 'concat']: x = np.random.random((samples, timesteps, dim)) target_dim = 2 * output_dim if mode == 'concat' else output_dim y = np.random.random((samples, target_dim)) inputs = Input((None, dim)) outputs = wrappers.Bidirectional(rnn(output_dim, dropout=dropout_rate, recurrent_dropout=dropout_rate), merge_mode=mode)(inputs) model = Model(inputs, outputs) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1) @pytest.mark.parametrize('merge_mode', ['sum', 'mul', 'ave', 'concat', None]) def test_Bidirectional_merged_value(merge_mode): rnn = layers.LSTM samples = 2 dim = 5 timesteps = 3 units = 3 X = [np.random.rand(samples, timesteps, dim)] if merge_mode == 'sum': merge_func = lambda y, y_rev: y + y_rev elif merge_mode == 'mul': merge_func = lambda y, y_rev: y * y_rev elif merge_mode == 'ave': merge_func = lambda y, y_rev: (y + y_rev) / 2 elif merge_mode == 'concat': merge_func = lambda y, y_rev: np.concatenate((y, y_rev), axis=-1) else: merge_func = lambda y, y_rev: [y, y_rev] # basic case inputs = Input((timesteps, dim)) layer = wrappers.Bidirectional(rnn(units, return_sequences=True), merge_mode=merge_mode) f_merged = K.function([inputs], to_list(layer(inputs))) f_forward = K.function([inputs], [layer.forward_layer.call(inputs)]) f_backward = K.function([inputs], [K.reverse(layer.backward_layer.call(inputs), 1)]) y_merged = f_merged(X) y_expected = to_list(merge_func(f_forward(X)[0], f_backward(X)[0])) assert len(y_merged) == len(y_expected) for x1, x2 in zip(y_merged, y_expected): assert_allclose(x1, x2, atol=1e-5) # test return_state inputs = Input((timesteps, dim)) layer = wrappers.Bidirectional(rnn(units, return_state=True), merge_mode=merge_mode) f_merged = K.function([inputs], layer(inputs)) f_forward = K.function([inputs], layer.forward_layer.call(inputs)) f_backward = K.function([inputs], layer.backward_layer.call(inputs)) n_states = len(layer.layer.states) y_merged = f_merged(X) y_forward = f_forward(X) y_backward = f_backward(X) y_expected = to_list(merge_func(y_forward[0], y_backward[0])) assert len(y_merged) == len(y_expected) + n_states * 2 for x1, x2 in zip(y_merged, y_expected): assert_allclose(x1, x2, atol=1e-5) # test if the state of a BiRNN is the concatenation of the underlying RNNs y_merged = y_merged[-n_states * 2:] y_forward = y_forward[-n_states:] y_backward = y_backward[-n_states:] for state_birnn, state_inner in zip(y_merged, y_forward + y_backward): assert_allclose(state_birnn, state_inner, atol=1e-5) @pytest.mark.skipif(K.backend() == 'theano', reason='Not supported.') @pytest.mark.parametrize('merge_mode', ['sum', 'concat', None]) def test_Bidirectional_dropout(merge_mode): rnn = layers.LSTM samples = 2 dim = 5 timesteps = 3 units = 3 X = [np.random.rand(samples, timesteps, dim)] inputs = Input((timesteps, dim)) wrapped = wrappers.Bidirectional(rnn(units, dropout=0.2, recurrent_dropout=0.2), merge_mode=merge_mode) outputs = to_list(wrapped(inputs, training=True)) assert all(not getattr(x, '_uses_learning_phase') for x in outputs) inputs = Input((timesteps, dim)) wrapped = wrappers.Bidirectional(rnn(units, dropout=0.2, return_state=True), merge_mode=merge_mode) outputs = to_list(wrapped(inputs)) assert all(x._uses_learning_phase for x in outputs) model = Model(inputs, outputs) assert model.uses_learning_phase y1 = to_list(model.predict(X)) y2 = to_list(model.predict(X)) for x1, x2 in zip(y1, y2): assert_allclose(x1, x2, atol=1e-5) def test_Bidirectional_state_reuse(): rnn = layers.LSTM samples = 2 dim = 5 timesteps = 3 units = 3 input1 = Input((timesteps, dim)) layer = wrappers.Bidirectional(rnn(units, return_state=True, return_sequences=True)) state = layer(input1)[1:] # test passing invalid initial_state: passing a tensor input2 = Input((timesteps, dim)) with pytest.raises(ValueError): output = wrappers.Bidirectional(rnn(units))(input2, initial_state=state[0]) # test valid usage: passing a list output = wrappers.Bidirectional(rnn(units))(input2, initial_state=state) model = Model([input1, input2], output) assert len(model.layers) == 4 assert isinstance(model.layers[-1].input, list) inputs = [np.random.rand(samples, timesteps, dim), np.random.rand(samples, timesteps, dim)] outputs = model.predict(inputs) def test_Bidirectional_with_constants(): class RNNCellWithConstants(Layer): def __init__(self, units, **kwargs): self.units = units self.state_size = units super(RNNCellWithConstants, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list): raise TypeError('expects constants shape') [input_shape, constant_shape] = input_shape # will (and should) raise if more than one constant passed self.input_kernel = self.add_weight( shape=(input_shape[-1], self.units), initializer='uniform', name='kernel') self.recurrent_kernel = self.add_weight( shape=(self.units, self.units), initializer='uniform', name='recurrent_kernel') self.constant_kernel = self.add_weight( shape=(constant_shape[-1], self.units), initializer='uniform', name='constant_kernel') self.built = True def call(self, inputs, states, constants): [prev_output] = states [constant] = constants h_input = K.dot(inputs, self.input_kernel) h_state = K.dot(prev_output, self.recurrent_kernel) h_const = K.dot(constant, self.constant_kernel) output = h_input + h_state + h_const return output, [output] def get_config(self): config = {'units': self.units} base_config = super(RNNCellWithConstants, self).get_config() return dict(list(base_config.items()) + list(config.items())) # Test basic case. x = Input((5, 5)) c = Input((3,)) cell = RNNCellWithConstants(32) custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} with CustomObjectScope(custom_objects): layer = wrappers.Bidirectional(RNN(cell)) y = layer(x, constants=c) model = Model([x, c], y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch( [np.zeros((6, 5, 5)), np.zeros((6, 3))], np.zeros((6, 64)) ) # Test basic case serialization. x_np = np.random.random((6, 5, 5)) c_np = np.random.random((6, 3)) y_np = model.predict([x_np, c_np]) weights = model.get_weights() config = layer.get_config() with CustomObjectScope(custom_objects): layer = wrappers.Bidirectional.from_config(copy.deepcopy(config)) y = layer(x, constants=c) model = Model([x, c], y) model.set_weights(weights) y_np_2 = model.predict([x_np, c_np]) assert_allclose(y_np, y_np_2, atol=1e-4) # test flat list inputs with CustomObjectScope(custom_objects): layer = wrappers.Bidirectional.from_config(copy.deepcopy(config)) y = layer([x, c]) model = Model([x, c], y) model.set_weights(weights) y_np_3 = model.predict([x_np, c_np]) assert_allclose(y_np, y_np_3, atol=1e-4) def test_Bidirectional_with_constants_layer_passing_initial_state(): class RNNCellWithConstants(Layer): def __init__(self, units, **kwargs): self.units = units self.state_size = units super(RNNCellWithConstants, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list): raise TypeError('expects constants shape') [input_shape, constant_shape] = input_shape # will (and should) raise if more than one constant passed self.input_kernel = self.add_weight( shape=(input_shape[-1], self.units), initializer='uniform', name='kernel') self.recurrent_kernel = self.add_weight( shape=(self.units, self.units), initializer='uniform', name='recurrent_kernel') self.constant_kernel = self.add_weight( shape=(constant_shape[-1], self.units), initializer='uniform', name='constant_kernel') self.built = True def call(self, inputs, states, constants): [prev_output] = states [constant] = constants h_input = K.dot(inputs, self.input_kernel) h_state = K.dot(prev_output, self.recurrent_kernel) h_const = K.dot(constant, self.constant_kernel) output = h_input + h_state + h_const return output, [output] def get_config(self): config = {'units': self.units} base_config = super(RNNCellWithConstants, self).get_config() return dict(list(base_config.items()) + list(config.items())) # Test basic case. x = Input((5, 5)) c = Input((3,)) s_for = Input((32,)) s_bac = Input((32,)) cell = RNNCellWithConstants(32) custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} with CustomObjectScope(custom_objects): layer = wrappers.Bidirectional(RNN(cell)) y = layer(x, initial_state=[s_for, s_bac], constants=c) model = Model([x, s_for, s_bac, c], y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch( [np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 32)), np.zeros((6, 3))], np.zeros((6, 64)) ) # Test basic case serialization. x_np = np.random.random((6, 5, 5)) s_fw_np = np.random.random((6, 32)) s_bk_np = np.random.random((6, 32)) c_np = np.random.random((6, 3)) y_np = model.predict([x_np, s_fw_np, s_bk_np, c_np]) weights = model.get_weights() config = layer.get_config() with CustomObjectScope(custom_objects): layer = wrappers.Bidirectional.from_config(copy.deepcopy(config)) y = layer(x, initial_state=[s_for, s_bac], constants=c) model = Model([x, s_for, s_bac, c], y) model.set_weights(weights) y_np_2 = model.predict([x_np, s_fw_np, s_bk_np, c_np]) assert_allclose(y_np, y_np_2, atol=1e-4) # verify that state is used y_np_2_different_s = model.predict([x_np, s_fw_np + 10., s_bk_np + 10., c_np]) with pytest.raises(AssertionError): assert_allclose(y_np, y_np_2_different_s, atol=1e-4) # test flat list inputs with CustomObjectScope(custom_objects): layer = wrappers.Bidirectional.from_config(copy.deepcopy(config)) y = layer([x, s_for, s_bac, c]) model = Model([x, s_for, s_bac, c], y) model.set_weights(weights) y_np_3 = model.predict([x_np, s_fw_np, s_bk_np, c_np]) assert_allclose(y_np, y_np_3, atol=1e-4) def test_Bidirectional_trainable(): # test layers that need learning_phase to be set x = Input(shape=(3, 2)) layer = wrappers.Bidirectional(layers.SimpleRNN(3)) _ = layer(x) assert len(layer.trainable_weights) == 6 layer.trainable = False assert len(layer.trainable_weights) == 0 layer.trainable = True assert len(layer.trainable_weights) == 6 def test_Bidirectional_updates(): x = Input(shape=(3, 2)) layer = wrappers.Bidirectional(layers.SimpleRNN(3)) assert len(layer.updates) == 0 assert len(layer.get_updates_for(None)) == 0 assert len(layer.get_updates_for(x)) == 0 layer.forward_layer.add_update(0, inputs=x) layer.forward_layer.add_update(1, inputs=None) layer.backward_layer.add_update(0, inputs=x) layer.backward_layer.add_update(1, inputs=None) assert len(layer.updates) == 4 assert len(layer.get_updates_for(None)) == 2 assert len(layer.get_updates_for(x)) == 2 def test_Bidirectional_losses(): x = Input(shape=(3, 2)) layer = wrappers.Bidirectional( layers.SimpleRNN(3, kernel_regularizer='l1', bias_regularizer='l1')) _ = layer(x) assert len(layer.losses) == 4 assert len(layer.get_losses_for(None)) == 4 assert len(layer.get_losses_for(x)) == 0 layer.forward_layer.add_loss(0, inputs=x) layer.forward_layer.add_loss(1, inputs=None) layer.backward_layer.add_loss(0, inputs=x) layer.backward_layer.add_loss(1, inputs=None) assert len(layer.losses) == 8 assert len(layer.get_losses_for(None)) == 6 assert len(layer.get_losses_for(x)) == 2 if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/layers/core_test.py0000644000000000116100000002712013354530144020434 0ustar rooteng00000000000000import pytest import numpy as np from numpy.testing import assert_allclose from keras import backend as K from keras import layers from keras.models import Model from keras.models import Sequential from keras.utils.test_utils import layer_test from keras import regularizers from keras import constraints from keras.layers import deserialize as deserialize_layer def test_masking(): layer_test(layers.Masking, kwargs={}, input_shape=(3, 2, 3)) def test_dropout(): layer_test(layers.Dropout, kwargs={'rate': 0.5}, input_shape=(3, 2)) layer_test(layers.Dropout, kwargs={'rate': 0.5, 'noise_shape': [3, 1]}, input_shape=(3, 2)) layer_test(layers.Dropout, kwargs={'rate': 0.5, 'noise_shape': [None, 1]}, input_shape=(3, 2)) layer_test(layers.SpatialDropout1D, kwargs={'rate': 0.5}, input_shape=(2, 3, 4)) for data_format in ['channels_last', 'channels_first']: for shape in [(4, 5), (4, 5, 6)]: if data_format == 'channels_last': input_shape = (2,) + shape + (3,) else: input_shape = (2, 3) + shape if len(shape) == 2: layer = layers.SpatialDropout2D else: layer = layers.SpatialDropout3D layer_test(layer, kwargs={'rate': 0.5, 'data_format': data_format}, input_shape=input_shape) # Test invalid use cases with pytest.raises(ValueError): layer_test(layer, kwargs={'rate': 0.5, 'data_format': 'channels_middle'}, input_shape=input_shape) def test_activation(): # with string argument layer_test(layers.Activation, kwargs={'activation': 'relu'}, input_shape=(3, 2)) # with function argument layer_test(layers.Activation, kwargs={'activation': K.relu}, input_shape=(3, 2)) def test_reshape(): layer_test(layers.Reshape, kwargs={'target_shape': (8, 1)}, input_shape=(3, 2, 4)) layer_test(layers.Reshape, kwargs={'target_shape': (-1, 1)}, input_shape=(3, 2, 4)) layer_test(layers.Reshape, kwargs={'target_shape': (1, -1)}, input_shape=(3, 2, 4)) layer_test(layers.Reshape, kwargs={'target_shape': (-1, 1)}, input_shape=(None, None, 4)) def test_permute(): layer_test(layers.Permute, kwargs={'dims': (2, 1)}, input_shape=(3, 2, 4)) def test_flatten(): def test_4d(): np_inp_channels_last = np.arange(24, dtype='float32').reshape( (1, 4, 3, 2)) np_output_cl = layer_test(layers.Flatten, kwargs={'data_format': 'channels_last'}, input_data=np_inp_channels_last) np_inp_channels_first = np.transpose(np_inp_channels_last, [0, 3, 1, 2]) np_output_cf = layer_test(layers.Flatten, kwargs={'data_format': 'channels_first'}, input_data=np_inp_channels_first, expected_output=np_output_cl) def test_3d(): np_inp_channels_last = np.arange(12, dtype='float32').reshape( (1, 4, 3)) np_output_cl = layer_test(layers.Flatten, kwargs={'data_format': 'channels_last'}, input_data=np_inp_channels_last) np_inp_channels_first = np.transpose(np_inp_channels_last, [0, 2, 1]) np_output_cf = layer_test(layers.Flatten, kwargs={'data_format': 'channels_first'}, input_data=np_inp_channels_first, expected_output=np_output_cl) def test_5d(): np_inp_channels_last = np.arange(120, dtype='float32').reshape( (1, 5, 4, 3, 2)) np_output_cl = layer_test(layers.Flatten, kwargs={'data_format': 'channels_last'}, input_data=np_inp_channels_last) np_inp_channels_first = np.transpose(np_inp_channels_last, [0, 4, 1, 2, 3]) np_output_cf = layer_test(layers.Flatten, kwargs={'data_format': 'channels_first'}, input_data=np_inp_channels_first, expected_output=np_output_cl) test_3d() test_4d() test_5d() def test_repeat_vector(): layer_test(layers.RepeatVector, kwargs={'n': 3}, input_shape=(3, 2)) def test_lambda(): layer_test(layers.Lambda, kwargs={'function': lambda x: x + 1}, input_shape=(3, 2)) layer_test(layers.Lambda, kwargs={'function': lambda x, a, b: x * a + b, 'arguments': {'a': 0.6, 'b': 0.4}}, input_shape=(3, 2)) def antirectifier(x): x -= K.mean(x, axis=1, keepdims=True) x = K.l2_normalize(x, axis=1) pos = K.relu(x) neg = K.relu(-x) return K.concatenate([pos, neg], axis=1) def antirectifier_output_shape(input_shape): shape = list(input_shape) assert len(shape) == 2 # only valid for 2D tensors shape[-1] *= 2 return tuple(shape) layer_test(layers.Lambda, kwargs={'function': antirectifier, 'output_shape': antirectifier_output_shape}, input_shape=(3, 2)) # test layer with multiple outputs def test_multiple_outputs(): def func(x): return [x * 0.2, x * 0.3] def output_shape(input_shape): return [input_shape, input_shape] def mask(inputs, mask=None): return [None, None] i = layers.Input(shape=(3, 2, 1)) o = layers.Lambda(function=func, output_shape=output_shape, mask=mask)(i) o1, o2 = o assert o1._keras_shape == (None, 3, 2, 1) assert o2._keras_shape == (None, 3, 2, 1) model = Model(i, o) x = np.random.random((4, 3, 2, 1)) out1, out2 = model.predict(x) assert out1.shape == (4, 3, 2, 1) assert out2.shape == (4, 3, 2, 1) assert_allclose(out1, x * 0.2, atol=1e-4) assert_allclose(out2, x * 0.3, atol=1e-4) test_multiple_outputs() # test layer with multiple outputs and no # explicit mask def test_multiple_outputs_no_mask(): def func(x): return [x * 0.2, x * 0.3] def output_shape(input_shape): return [input_shape, input_shape] i = layers.Input(shape=(3, 2, 1)) o = layers.Lambda(function=func, output_shape=output_shape)(i) assert o[0]._keras_shape == (None, 3, 2, 1) assert o[1]._keras_shape == (None, 3, 2, 1) o = layers.add(o) model = Model(i, o) i2 = layers.Input(shape=(3, 2, 1)) o2 = model(i2) model2 = Model(i2, o2) x = np.random.random((4, 3, 2, 1)) out = model2.predict(x) assert out.shape == (4, 3, 2, 1) assert_allclose(out, x * 0.2 + x * 0.3, atol=1e-4) test_multiple_outputs_no_mask() # test serialization with function def f(x): return x + 1 ld = layers.Lambda(f) config = ld.get_config() ld = deserialize_layer({'class_name': 'Lambda', 'config': config}) # test with lambda ld = layers.Lambda( lambda x: K.concatenate([K.square(x), x]), output_shape=lambda s: tuple(list(s)[:-1] + [2 * s[-1]])) config = ld.get_config() ld = layers.Lambda.from_config(config) # test serialization with output_shape function def f(x): return K.concatenate([K.square(x), x]) def f_shape(s): return tuple(list(s)[:-1] + [2 * s[-1]]) ld = layers.Lambda(f, output_shape=f_shape) config = ld.get_config() ld = deserialize_layer({'class_name': 'Lambda', 'config': config}) @pytest.mark.skipif((K.backend() == 'theano'), reason="theano cannot compute " "the output shape automatically.") def test_lambda_output_shape(): layer_test(layers.Lambda, kwargs={'function': lambda x: K.mean(x, axis=-1)}, input_shape=(3, 2, 4)) def test_dense(): layer_test(layers.Dense, kwargs={'units': 3}, input_shape=(3, 2)) layer_test(layers.Dense, kwargs={'units': 3}, input_shape=(3, 4, 2)) layer_test(layers.Dense, kwargs={'units': 3}, input_shape=(None, None, 2)) layer_test(layers.Dense, kwargs={'units': 3}, input_shape=(3, 4, 5, 2)) layer_test(layers.Dense, kwargs={'units': 3, 'kernel_regularizer': regularizers.l2(0.01), 'bias_regularizer': regularizers.l1(0.01), 'activity_regularizer': regularizers.L1L2(l1=0.01, l2=0.01), 'kernel_constraint': constraints.MaxNorm(1), 'bias_constraint': constraints.max_norm(1)}, input_shape=(3, 2)) layer = layers.Dense(3, kernel_regularizer=regularizers.l1(0.01), bias_regularizer='l1') layer.build((None, 4)) assert len(layer.losses) == 2 def test_activity_regularization(): layer = layers.ActivityRegularization(l1=0.01, l2=0.01) # test in functional API x = layers.Input(shape=(3,)) z = layers.Dense(2)(x) y = layer(z) model = Model(x, y) model.compile('rmsprop', 'mse') model.predict(np.random.random((2, 3))) # test serialization model_config = model.get_config() model = Model.from_config(model_config) model.compile('rmsprop', 'mse') def test_sequential_as_downstream_of_masking_layer(): inputs = layers.Input(shape=(3, 4)) x = layers.Masking(mask_value=0., input_shape=(3, 4))(inputs) s = Sequential() s.add(layers.Dense(5, input_shape=(4,))) s.add(layers.Activation('relu')) x = layers.wrappers.TimeDistributed(s)(x) model = Model(inputs=inputs, outputs=x) model.compile(optimizer='rmsprop', loss='mse') model_input = np.random.randint(low=1, high=5, size=(10, 3, 4)) for i in range(4): model_input[i, i:, :] = 0. model.fit(model_input, np.random.random((10, 3, 5)), epochs=1, batch_size=6) mask_outputs = [model.layers[1].compute_mask(model.layers[1].input)] mask_outputs += [model.layers[2].compute_mask(model.layers[2].input, mask_outputs[-1])] func = K.function([model.input], mask_outputs) mask_outputs_val = func([model_input]) assert np.array_equal(mask_outputs_val[0], np.any(model_input, axis=-1)) assert np.array_equal(mask_outputs_val[1], np.any(model_input, axis=-1)) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/layers/pooling_test.py0000644000000000116100000001164313354530144021156 0ustar rooteng00000000000000import numpy as np import pytest from keras.utils.test_utils import layer_test from keras.layers import pooling from keras.layers import Masking from keras.layers import convolutional from keras.models import Sequential @pytest.mark.parametrize( 'padding,stride,data_format', [(padding, stride, data_format) for padding in ['valid', 'same'] for stride in [1, 2] for data_format in ['channels_first', 'channels_last']] ) def test_maxpooling_1d(padding, stride, data_format): layer_test(convolutional.MaxPooling1D, kwargs={'strides': stride, 'padding': padding, 'data_format': data_format}, input_shape=(3, 5, 4)) @pytest.mark.parametrize( 'strides', [(1, 1), (2, 3)] ) def test_maxpooling_2d(strides): pool_size = (3, 3) layer_test(convolutional.MaxPooling2D, kwargs={'strides': strides, 'padding': 'valid', 'pool_size': pool_size}, input_shape=(3, 5, 6, 4)) @pytest.mark.parametrize( 'strides,data_format,input_shape', [(2, None, (3, 11, 12, 10, 4)), (3, 'channels_first', (3, 4, 11, 12, 10))] ) def test_maxpooling_3d(strides, data_format, input_shape): pool_size = (3, 3, 3) layer_test(convolutional.MaxPooling3D, kwargs={'strides': strides, 'padding': 'valid', 'data_format': data_format, 'pool_size': pool_size}, input_shape=input_shape) @pytest.mark.parametrize( 'padding,stride,data_format', [(padding, stride, data_format) for padding in ['valid', 'same'] for stride in [1, 2] for data_format in ['channels_first', 'channels_last']] ) def test_averagepooling_1d(padding, stride, data_format): layer_test(convolutional.AveragePooling1D, kwargs={'strides': stride, 'padding': padding, 'data_format': data_format}, input_shape=(3, 5, 4)) @pytest.mark.parametrize( 'strides,padding,data_format,input_shape', [((2, 2), 'same', None, (3, 5, 6, 4)), ((2, 2), 'valid', None, (3, 5, 6, 4)), ((1, 1), 'valid', 'channels_first', (3, 4, 5, 6))] ) def test_averagepooling_2d(strides, padding, data_format, input_shape): layer_test(convolutional.AveragePooling2D, kwargs={'strides': strides, 'padding': padding, 'pool_size': (2, 2), 'data_format': data_format}, input_shape=input_shape) @pytest.mark.parametrize( 'strides,data_format,input_shape', [(2, None, (3, 11, 12, 10, 4)), (3, 'channels_first', (3, 4, 11, 12, 10))] ) def test_averagepooling_3d(strides, data_format, input_shape): pool_size = (3, 3, 3) layer_test(convolutional.AveragePooling3D, kwargs={'strides': strides, 'padding': 'valid', 'data_format': data_format, 'pool_size': pool_size}, input_shape=input_shape) @pytest.mark.parametrize( 'data_format,pooling_class', [(data_format, pooling_class) for data_format in ['channels_first', 'channels_last'] for pooling_class in [pooling.GlobalMaxPooling1D, pooling.GlobalAveragePooling1D]] ) def test_globalpooling_1d(data_format, pooling_class): layer_test(pooling_class, kwargs={'data_format': data_format}, input_shape=(3, 4, 5)) def test_globalpooling_1d_supports_masking(): # Test GlobalAveragePooling1D supports masking model = Sequential() model.add(Masking(mask_value=0., input_shape=(3, 4))) model.add(pooling.GlobalAveragePooling1D()) model.compile(loss='mae', optimizer='adam') model_input = np.random.randint(low=1, high=5, size=(2, 3, 4)) model_input[0, 1:, :] = 0 output = model.predict(model_input) assert np.array_equal(output[0], model_input[0, 0, :]) @pytest.mark.parametrize( 'data_format,pooling_class', [(data_format, pooling_class) for data_format in ['channels_first', 'channels_last'] for pooling_class in [pooling.GlobalMaxPooling2D, pooling.GlobalAveragePooling2D]] ) def test_globalpooling_2d(data_format, pooling_class): layer_test(pooling_class, kwargs={'data_format': data_format}, input_shape=(3, 4, 5, 6)) @pytest.mark.parametrize( 'data_format,pooling_class', [(data_format, pooling_class) for data_format in ['channels_first', 'channels_last'] for pooling_class in [pooling.GlobalMaxPooling3D, pooling.GlobalAveragePooling3D]] ) def test_globalpooling_3d(data_format, pooling_class): layer_test(pooling_class, kwargs={'data_format': data_format}, input_shape=(3, 4, 3, 4, 3)) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/layers/cudnn_recurrent_test.py0000644000000000116100000003143413354530144022707 0ustar rooteng00000000000000import pytest import numpy as np from numpy.testing import assert_allclose import keras import keras.backend as K from keras.utils.test_utils import layer_test import time skipif_no_tf_gpu = pytest.mark.skipif( (K.backend() != 'tensorflow' or not K.tensorflow_backend._get_available_gpus()), reason='Requires TensorFlow backend and a GPU') @skipif_no_tf_gpu def test_cudnn_rnn_canonical_to_params_lstm(): units = 1 input_size = 1 layer = keras.layers.CuDNNLSTM(units) layer.build((None, None, input_size)) params = layer._canonical_to_params( weights=[ layer.kernel_i, layer.kernel_f, layer.kernel_c, layer.kernel_o, layer.recurrent_kernel_i, layer.recurrent_kernel_f, layer.recurrent_kernel_c, layer.recurrent_kernel_o, ], biases=[ layer.bias_i_i, layer.bias_f_i, layer.bias_c_i, layer.bias_o_i, layer.bias_i, layer.bias_f, layer.bias_c, layer.bias_o, ], ) ref_params = layer._cudnn_lstm.canonical_to_params( weights=[ layer.kernel_i, layer.kernel_f, layer.kernel_c, layer.kernel_o, layer.recurrent_kernel_i, layer.recurrent_kernel_f, layer.recurrent_kernel_c, layer.recurrent_kernel_o, ], biases=[ layer.bias_i_i, layer.bias_f_i, layer.bias_c_i, layer.bias_o_i, layer.bias_i, layer.bias_f, layer.bias_c, layer.bias_o, ], ) ref_params_value = keras.backend.get_value(ref_params) params_value = keras.backend.get_value(params) diff = np.mean(ref_params_value - params_value) assert diff < 1e-8 @skipif_no_tf_gpu def test_cudnn_rnn_canonical_to_params_gru(): units = 7 input_size = 9 layer = keras.layers.CuDNNGRU(units) layer.build((None, None, input_size)) ref_params = layer._cudnn_gru.canonical_to_params( weights=[ layer.kernel_r, layer.kernel_z, layer.kernel_h, layer.recurrent_kernel_r, layer.recurrent_kernel_z, layer.recurrent_kernel_h, ], biases=[ layer.bias_r_i, layer.bias_z_i, layer.bias_h_i, layer.bias_r, layer.bias_z, layer.bias_h, ], ) params = layer._canonical_to_params( weights=[ layer.kernel_r, layer.kernel_z, layer.kernel_h, layer.recurrent_kernel_r, layer.recurrent_kernel_z, layer.recurrent_kernel_h, ], biases=[ layer.bias_r_i, layer.bias_z_i, layer.bias_h_i, layer.bias_r, layer.bias_z, layer.bias_h, ], ) ref_params_value = keras.backend.get_value(ref_params) params_value = keras.backend.get_value(params) diff = np.mean(ref_params_value - params_value) assert diff < 1e-8 @pytest.mark.parametrize('rnn_type', ['lstm', 'gru'], ids=['LSTM', 'GRU']) @skipif_no_tf_gpu def test_cudnn_rnn_timing(rnn_type): input_size = 1000 timesteps = 60 units = 256 num_samples = 10000 times = [] for use_cudnn in [True, False]: start_time = time.time() inputs = keras.layers.Input(shape=(None, input_size)) if use_cudnn: if rnn_type == 'lstm': layer = keras.layers.CuDNNLSTM(units) else: layer = keras.layers.CuDNNGRU(units) else: if rnn_type == 'lstm': layer = keras.layers.LSTM(units) else: layer = keras.layers.GRU(units) outputs = layer(inputs) model = keras.models.Model(inputs, outputs) model.compile('sgd', 'mse') x = np.random.random((num_samples, timesteps, input_size)) y = np.random.random((num_samples, units)) model.fit(x, y, epochs=4, batch_size=32) times.append(time.time() - start_time) speedup = times[1] / times[0] print(rnn_type, 'speedup', speedup) assert speedup > 3 @skipif_no_tf_gpu def test_cudnn_rnn_basics(): input_size = 10 timesteps = 6 units = 2 num_samples = 32 for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: for return_sequences in [True, False]: with keras.utils.CustomObjectScope( {'keras.layers.CuDNNGRU': keras.layers.CuDNNGRU, 'keras.layers.CuDNNLSTM': keras.layers.CuDNNLSTM}): layer_test( layer_class, kwargs={'units': units, 'return_sequences': return_sequences}, input_shape=(num_samples, timesteps, input_size)) for go_backwards in [True, False]: with keras.utils.CustomObjectScope( {'keras.layers.CuDNNGRU': keras.layers.CuDNNGRU, 'keras.layers.CuDNNLSTM': keras.layers.CuDNNLSTM}): layer_test( layer_class, kwargs={'units': units, 'go_backwards': go_backwards}, input_shape=(num_samples, timesteps, input_size)) @skipif_no_tf_gpu def test_trainability(): input_size = 10 units = 2 for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: layer = layer_class(units) layer.build((None, None, input_size)) assert len(layer.weights) == 3 assert len(layer.trainable_weights) == 3 assert len(layer.non_trainable_weights) == 0 layer.trainable = False assert len(layer.weights) == 3 assert len(layer.non_trainable_weights) == 3 assert len(layer.trainable_weights) == 0 layer.trainable = True assert len(layer.weights) == 3 assert len(layer.trainable_weights) == 3 assert len(layer.non_trainable_weights) == 0 @skipif_no_tf_gpu def test_regularizer(): input_size = 10 timesteps = 6 units = 2 num_samples = 32 for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: layer = layer_class(units, return_sequences=False, input_shape=(timesteps, input_size), kernel_regularizer=keras.regularizers.l1(0.01), recurrent_regularizer=keras.regularizers.l1(0.01), bias_regularizer='l2') layer.build((None, None, input_size)) assert len(layer.losses) == 3 layer = layer_class(units, return_sequences=False, input_shape=(timesteps, input_size), activity_regularizer='l2') assert layer.activity_regularizer x = keras.backend.variable(np.ones((num_samples, timesteps, input_size))) layer(x) assert len(layer.get_losses_for(x)) == 1 @skipif_no_tf_gpu def test_return_state(): input_size = 10 timesteps = 6 units = 2 num_samples = 32 for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 inputs = keras.Input(batch_shape=(num_samples, timesteps, input_size)) layer = layer_class(units, return_state=True, stateful=True) outputs = layer(inputs) output, state = outputs[0], outputs[1:] assert len(state) == num_states model = keras.models.Model(inputs, state[0]) inputs = np.random.random((num_samples, timesteps, input_size)) state = model.predict(inputs) np.testing.assert_allclose( keras.backend.eval(layer.states[0]), state, atol=1e-4) @skipif_no_tf_gpu def test_specify_initial_state_keras_tensor(): input_size = 10 timesteps = 6 units = 2 num_samples = 32 for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 inputs = keras.Input((timesteps, input_size)) initial_state = [keras.Input((units,)) for _ in range(num_states)] layer = layer_class(units) if len(initial_state) == 1: output = layer(inputs, initial_state=initial_state[0]) else: output = layer(inputs, initial_state=initial_state) assert initial_state[0] in layer._inbound_nodes[0].input_tensors model = keras.models.Model([inputs] + initial_state, output) model.compile(loss='categorical_crossentropy', optimizer='adam') inputs = np.random.random((num_samples, timesteps, input_size)) initial_state = [np.random.random((num_samples, units)) for _ in range(num_states)] targets = np.random.random((num_samples, units)) model.fit([inputs] + initial_state, targets) @skipif_no_tf_gpu def test_statefulness(): input_size = 10 timesteps = 6 units = 2 num_samples = 32 for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: model = keras.models.Sequential() model.add(keras.layers.Embedding(10, input_size, input_length=timesteps, batch_input_shape=(num_samples, timesteps))) layer = layer_class(units, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((num_samples, timesteps))) assert(out1.shape == (num_samples, units)) # train once so that the states change model.train_on_batch(np.ones((num_samples, timesteps)), np.ones((num_samples, units))) out2 = model.predict(np.ones((num_samples, timesteps))) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((num_samples, timesteps))) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((num_samples, timesteps))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((num_samples, timesteps))) assert(out4.max() != out5.max()) @skipif_no_tf_gpu def test_cudnnrnn_bidirectional(): rnn = keras.layers.CuDNNGRU samples = 2 dim = 2 timesteps = 2 output_dim = 2 mode = 'concat' x = np.random.random((samples, timesteps, dim)) target_dim = 2 * output_dim if mode == 'concat' else output_dim y = np.random.random((samples, target_dim)) # test with Sequential model model = keras.Sequential() model.add(keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode, input_shape=(None, dim))) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1) # test config model.get_config() model = keras.models.model_from_json(model.to_json()) model.summary() # test stacked bidirectional layers model = keras.Sequential() model.add(keras.layers.Bidirectional(rnn(output_dim, return_sequences=True), merge_mode=mode, input_shape=(None, dim))) model.add(keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1) # test with functional API inputs = keras.Input((timesteps, dim)) outputs = keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)(inputs) model = keras.Model(inputs, outputs) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1) # Bidirectional and stateful inputs = keras.Input(batch_shape=(1, timesteps, dim)) outputs = keras.layers.Bidirectional(rnn(output_dim, stateful=True), merge_mode=mode)(inputs) model = keras.Model(inputs, outputs) model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1, batch_size=1) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/layers/convolutional_test.py0000644000000000116100000012170513354530144022404 0ustar rooteng00000000000000import pytest import numpy as np from numpy.testing import assert_allclose from keras.utils.test_utils import layer_test from keras import backend as K from keras.layers import convolutional from keras.models import Sequential # TensorFlow does not support full convolution. if K.backend() == 'theano': _convolution_paddings = ['valid', 'same', 'full'] else: _convolution_paddings = ['valid', 'same'] @pytest.mark.skipif((K.backend() == 'cntk' and K.dev.type() == 0), reason='cntk only support dilated conv on GPU') @pytest.mark.parametrize( 'layer_kwargs,input_length,expected_output', [ # Causal ({'filters': 1, 'kernel_size': 2, 'dilation_rate': 1, 'padding': 'causal', 'kernel_initializer': 'ones', 'use_bias': False}, 4, [[[0], [1], [3], [5]]]), # Non-causal ({'filters': 1, 'kernel_size': 2, 'dilation_rate': 1, 'padding': 'valid', 'kernel_initializer': 'ones', 'use_bias': False}, 4, [[[1], [3], [5]]]), # Causal dilated with larger kernel size ({'filters': 1, 'kernel_size': 3, 'dilation_rate': 2, 'padding': 'causal', 'kernel_initializer': 'ones', 'use_bias': False}, 10, np.float32([[[0], [1], [2], [4], [6], [9], [12], [15], [18], [21]]])), ] ) def test_causal_dilated_conv(layer_kwargs, input_length, expected_output): input_data = np.reshape(np.arange(input_length, dtype='float32'), (1, input_length, 1)) layer_test(convolutional.Conv1D, input_data=input_data, kwargs=layer_kwargs, expected_output=expected_output) @pytest.mark.parametrize( 'padding,strides', [(padding, strides) for padding in _convolution_paddings for strides in [1, 2] if not (padding == 'same' and strides != 1)] ) def test_conv_1d(padding, strides): batch_size = 2 steps = 8 input_dim = 2 kernel_size = 3 filters = 3 layer_test(convolutional.Conv1D, kwargs={'filters': filters, 'kernel_size': kernel_size, 'padding': padding, 'strides': strides}, input_shape=(batch_size, steps, input_dim)) layer_test(convolutional.Conv1D, kwargs={'filters': filters, 'kernel_size': kernel_size, 'padding': padding, 'kernel_regularizer': 'l2', 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'strides': strides}, input_shape=(batch_size, steps, input_dim)) @pytest.mark.skipif((K.backend() == 'cntk' and K.dev.type() == 0), reason='cntk only support dilated conv on GPU') def test_conv_1d_dilation(): batch_size = 2 steps = 8 input_dim = 2 kernel_size = 3 filters = 3 padding = _convolution_paddings[-1] layer_test(convolutional.Conv1D, kwargs={'filters': filters, 'kernel_size': kernel_size, 'padding': padding, 'dilation_rate': 2}, input_shape=(batch_size, steps, input_dim)) def test_conv_1d_channels_first(): batch_size = 2 steps = 8 input_dim = 2 kernel_size = 3 filters = 3 layer_test(convolutional.Conv1D, kwargs={'filters': filters, 'kernel_size': kernel_size, 'data_format': 'channels_first'}, input_shape=(batch_size, input_dim, steps)) @pytest.mark.parametrize( 'strides,padding', [(strides, padding) for padding in _convolution_paddings for strides in [(1, 1), (2, 2)] if not (padding == 'same' and strides != (1, 1))] ) def test_convolution_2d(strides, padding): num_samples = 2 filters = 2 stack_size = 3 kernel_size = (3, 2) num_row = 7 num_col = 6 layer_test(convolutional.Conv2D, kwargs={'filters': filters, 'kernel_size': kernel_size, 'padding': padding, 'strides': strides, 'data_format': 'channels_first'}, input_shape=(num_samples, stack_size, num_row, num_col)) def test_convolution_2d_channels_last(): num_samples = 2 filters = 2 stack_size = 3 num_row = 7 num_col = 6 padding = 'valid' strides = (2, 2) layer_test(convolutional.Conv2D, kwargs={'filters': filters, 'kernel_size': 3, 'padding': padding, 'data_format': 'channels_last', 'activation': None, 'kernel_regularizer': 'l2', 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'strides': strides}, input_shape=(num_samples, num_row, num_col, stack_size)) @pytest.mark.skipif((K.backend() == 'cntk' and K.dev.type() == 0), reason='cntk only supports dilated conv on GPU') def test_convolution_2d_dilation(): num_samples = 2 filters = 2 stack_size = 3 kernel_size = (3, 2) num_row = 7 num_col = 6 padding = 'valid' layer_test(convolutional.Conv2D, kwargs={'filters': filters, 'kernel_size': kernel_size, 'padding': padding, 'dilation_rate': (2, 2)}, input_shape=(num_samples, num_row, num_col, stack_size)) def test_convolution_2d_invalid(): filters = 2 padding = _convolution_paddings[-1] kernel_size = (3, 2) with pytest.raises(ValueError): model = Sequential([convolutional.Conv2D( filters=filters, kernel_size=kernel_size, padding=padding, batch_input_shape=(None, None, 5, None))]) @pytest.mark.parametrize( 'padding,out_padding,strides', [(padding, out_padding, strides) for padding in _convolution_paddings for out_padding in [None, (0, 0), (1, 1)] for strides in [(1, 1), (2, 2)] if (not (padding == 'same' and strides != (1, 1)) and not(strides == (1, 1) and out_padding == (1, 1)))] ) def test_conv2d_transpose(padding, out_padding, strides): num_samples = 2 filters = 2 stack_size = 3 num_row = 5 num_col = 6 layer_test(convolutional.Conv2DTranspose, kwargs={'filters': filters, 'kernel_size': 3, 'padding': padding, 'output_padding': out_padding, 'strides': strides, 'data_format': 'channels_last'}, input_shape=(num_samples, num_row, num_col, stack_size), fixed_batch_size=True) @pytest.mark.skipif((K.backend() == 'cntk' and K.dev.type() == 0), reason='cntk only supports dilated conv transpose on GPU') def test_conv2d_transpose_dilation(): layer_test(convolutional.Conv2DTranspose, kwargs={'filters': 2, 'kernel_size': 3, 'padding': 'same', 'data_format': 'channels_last', 'dilation_rate': (2, 2)}, input_shape=(2, 5, 6, 3)) # Check dilated conv transpose returns expected output input_data = np.arange(48).reshape((1, 4, 4, 3)).astype(np.float32) expected_output = np.float32([[192, 228, 192, 228], [336, 372, 336, 372], [192, 228, 192, 228], [336, 372, 336, 372]]).reshape((1, 4, 4, 1)) layer_test(convolutional.Conv2DTranspose, input_data=input_data, kwargs={'filters': 1, 'kernel_size': 3, 'padding': 'same', 'data_format': 'channels_last', 'dilation_rate': (2, 2), 'kernel_initializer': 'ones'}, expected_output=expected_output) def test_conv2d_transpose_channels_first(): num_samples = 2 filters = 2 stack_size = 3 num_row = 5 num_col = 6 padding = 'valid' strides = (2, 2) layer_test(convolutional.Conv2DTranspose, kwargs={'filters': filters, 'kernel_size': 3, 'padding': padding, 'data_format': 'channels_first', 'activation': None, 'kernel_regularizer': 'l2', 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'strides': strides}, input_shape=(num_samples, stack_size, num_row, num_col), fixed_batch_size=True) def test_conv2d_transpose_invalid(): filters = 2 stack_size = 3 num_row = 5 num_col = 6 padding = 'valid' with pytest.raises(ValueError): model = Sequential([convolutional.Conv2DTranspose( filters=filters, kernel_size=3, padding=padding, use_bias=True, batch_input_shape=(None, None, 5, None))]) # Test invalid output padding for given stride. Output padding equal to stride with pytest.raises(ValueError): model = Sequential([convolutional.Conv2DTranspose( filters=filters, kernel_size=3, padding=padding, output_padding=(0, 3), strides=(1, 3), batch_input_shape=(None, num_row, num_col, stack_size))]) # Output padding greater than stride with pytest.raises(ValueError): model = Sequential([convolutional.Conv2DTranspose( filters=filters, kernel_size=3, padding=padding, output_padding=(2, 2), strides=(1, 3), batch_input_shape=(None, num_row, num_col, stack_size))]) @pytest.mark.parametrize( 'padding,strides,multiplier,dilation_rate', [(padding, strides, multiplier, dilation_rate) for padding in _convolution_paddings for strides in [1, 2] for multiplier in [1, 2] for dilation_rate in [1, 2] if (not (padding == 'same' and strides != 1) and not (dilation_rate != 1 and strides != 1) and not (dilation_rate != 1 and K.backend() == 'cntk'))] ) def test_separable_conv_1d(padding, strides, multiplier, dilation_rate): num_samples = 2 filters = 6 stack_size = 3 num_step = 9 layer_test(convolutional.SeparableConv1D, kwargs={'filters': filters, 'kernel_size': 3, 'padding': padding, 'strides': strides, 'depth_multiplier': multiplier, 'dilation_rate': dilation_rate}, input_shape=(num_samples, num_step, stack_size)) def test_separable_conv_1d_additional_args(): num_samples = 2 filters = 6 stack_size = 3 num_step = 9 padding = 'valid' multiplier = 2 layer_test(convolutional.SeparableConv1D, kwargs={'filters': filters, 'kernel_size': 3, 'padding': padding, 'data_format': 'channels_first', 'activation': None, 'depthwise_regularizer': 'l2', 'pointwise_regularizer': 'l2', 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'pointwise_constraint': 'unit_norm', 'depthwise_constraint': 'unit_norm', 'strides': 1, 'use_bias': True, 'depth_multiplier': multiplier}, input_shape=(num_samples, stack_size, num_step)) def test_separable_conv_1d_invalid(): filters = 6 padding = 'valid' with pytest.raises(ValueError): model = Sequential([convolutional.SeparableConv1D( filters=filters, kernel_size=3, padding=padding, batch_input_shape=(None, 5, None))]) @pytest.mark.parametrize( 'padding,strides,multiplier,dilation_rate', [(padding, strides, multiplier, dilation_rate) for padding in _convolution_paddings for strides in [(1, 1), (2, 2)] for multiplier in [1, 2] for dilation_rate in [(1, 1), (2, 2), (2, 1), (1, 2)] if (not (padding == 'same' and strides != (1, 1)) and not (dilation_rate != (1, 1) and strides != (1, 1)) and not (dilation_rate != (1, 1) and multiplier == dilation_rate[0]) and not (dilation_rate != (1, 1) and K.backend() == 'cntk'))] ) def test_separable_conv_2d(padding, strides, multiplier, dilation_rate): num_samples = 2 filters = 6 stack_size = 3 num_row = 7 num_col = 6 layer_test( convolutional.SeparableConv2D, kwargs={'filters': filters, 'kernel_size': (3, 3), 'padding': padding, 'strides': strides, 'depth_multiplier': multiplier, 'dilation_rate': dilation_rate}, input_shape=(num_samples, num_row, num_col, stack_size)) def test_separable_conv_2d_additional_args(): num_samples = 2 filters = 6 stack_size = 3 num_row = 7 num_col = 6 padding = 'valid' strides = (2, 2) multiplier = 2 layer_test(convolutional.SeparableConv2D, kwargs={'filters': filters, 'kernel_size': 3, 'padding': padding, 'data_format': 'channels_first', 'activation': None, 'depthwise_regularizer': 'l2', 'pointwise_regularizer': 'l2', 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'pointwise_constraint': 'unit_norm', 'depthwise_constraint': 'unit_norm', 'strides': strides, 'depth_multiplier': multiplier}, input_shape=(num_samples, stack_size, num_row, num_col)) def test_separable_conv_2d_invalid(): filters = 6 padding = 'valid' with pytest.raises(ValueError): model = Sequential([convolutional.SeparableConv2D( filters=filters, kernel_size=3, padding=padding, batch_input_shape=(None, None, 5, None))]) @pytest.mark.parametrize( 'padding,strides,multiplier', [(padding, strides, multiplier) for padding in _convolution_paddings for strides in [(1, 1), (2, 2)] for multiplier in [1, 2] if not (padding == 'same' and strides != (1, 1))] ) def test_depthwise_conv_2d(padding, strides, multiplier): num_samples = 2 stack_size = 3 num_row = 7 num_col = 6 layer_test(convolutional.DepthwiseConv2D, kwargs={'kernel_size': (3, 3), 'padding': padding, 'strides': strides, 'depth_multiplier': multiplier}, input_shape=(num_samples, num_row, num_col, stack_size)) def test_depthwise_conv_2d_additional_args(): num_samples = 2 stack_size = 3 num_row = 7 num_col = 6 padding = 'valid' strides = (2, 2) multiplier = 2 layer_test(convolutional.DepthwiseConv2D, kwargs={'kernel_size': 3, 'padding': padding, 'data_format': 'channels_first', 'activation': None, 'depthwise_regularizer': 'l2', 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'depthwise_constraint': 'unit_norm', 'use_bias': True, 'strides': strides, 'depth_multiplier': multiplier}, input_shape=(num_samples, stack_size, num_row, num_col)) def test_depthwise_conv_2d_invalid(): padding = 'valid' with pytest.raises(ValueError): Sequential([convolutional.DepthwiseConv2D( kernel_size=3, padding=padding, batch_input_shape=(None, None, 5, None))]) @pytest.mark.parametrize( 'padding,strides', [(padding, strides) for padding in _convolution_paddings for strides in [(1, 1, 1), (2, 2, 2)] if not (padding == 'same' and strides != (1, 1, 1))] ) def test_convolution_3d(padding, strides): num_samples = 2 filters = 2 stack_size = 3 input_len_dim1 = 9 input_len_dim2 = 8 input_len_dim3 = 8 layer_test(convolutional.Convolution3D, kwargs={'filters': filters, 'kernel_size': 3, 'padding': padding, 'strides': strides}, input_shape=(num_samples, input_len_dim1, input_len_dim2, input_len_dim3, stack_size)) def test_convolution_3d_additional_args(): num_samples = 2 filters = 2 stack_size = 3 padding = 'valid' strides = (2, 2, 2) input_len_dim1 = 9 input_len_dim2 = 8 input_len_dim3 = 8 layer_test(convolutional.Convolution3D, kwargs={'filters': filters, 'kernel_size': (1, 2, 3), 'padding': padding, 'activation': None, 'kernel_regularizer': 'l2', 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'strides': strides}, input_shape=(num_samples, input_len_dim1, input_len_dim2, input_len_dim3, stack_size)) @pytest.mark.parametrize( 'padding,out_padding,strides,data_format', [(padding, out_padding, strides, data_format) for padding in _convolution_paddings for out_padding in [None, (0, 0, 0), (1, 1, 1)] for strides in [(1, 1, 1), (2, 2, 2)] for data_format in ['channels_first', 'channels_last'] if (not (padding == 'same' and strides != (1, 1, 1)) and not (strides == (1, 1, 1) and out_padding == (1, 1, 1)))] ) def test_conv3d_transpose(padding, out_padding, strides, data_format): filters = 2 stack_size = 3 num_depth = 7 num_row = 5 num_col = 6 layer_test( convolutional.Conv3DTranspose, kwargs={'filters': filters, 'kernel_size': 3, 'padding': padding, 'output_padding': out_padding, 'strides': strides, 'data_format': data_format}, input_shape=(None, num_depth, num_row, num_col, stack_size), fixed_batch_size=True) def test_conv3d_transpose_additional_args(): filters = 2 stack_size = 3 num_depth = 7 num_row = 5 num_col = 6 padding = 'valid' strides = (2, 2, 2) layer_test(convolutional.Conv3DTranspose, kwargs={'filters': filters, 'kernel_size': 3, 'padding': padding, 'data_format': 'channels_first', 'activation': None, 'kernel_regularizer': 'l2', 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'use_bias': True, 'strides': strides}, input_shape=(None, stack_size, num_depth, num_row, num_col), fixed_batch_size=True) def test_conv3d_transpose_invalid(): filters = 2 stack_size = 3 num_depth = 7 num_row = 5 num_col = 6 padding = 'valid' # Test invalid use case with pytest.raises(ValueError): model = Sequential([convolutional.Conv3DTranspose( filters=filters, kernel_size=3, padding=padding, batch_input_shape=(None, None, 5, None, None))]) # Test invalid output padding for given stride. Output padding equal # to stride with pytest.raises(ValueError): model = Sequential([convolutional.Conv3DTranspose( filters=filters, kernel_size=3, padding=padding, output_padding=(0, 3, 3), strides=(1, 3, 4), batch_input_shape=(None, num_depth, num_row, num_col, stack_size))]) # Output padding greater than stride with pytest.raises(ValueError): model = Sequential([convolutional.Conv3DTranspose( filters=filters, kernel_size=3, padding=padding, output_padding=(2, 2, 3), strides=(1, 3, 4), batch_input_shape=(None, num_depth, num_row, num_col, stack_size))]) def test_zero_padding_1d(): num_samples = 2 input_dim = 2 num_steps = 5 shape = (num_samples, num_steps, input_dim) inputs = np.ones(shape) # basic test layer_test(convolutional.ZeroPadding1D, kwargs={'padding': 2}, input_shape=inputs.shape) layer_test(convolutional.ZeroPadding1D, kwargs={'padding': (1, 2)}, input_shape=inputs.shape) # correctness test layer = convolutional.ZeroPadding1D(padding=2) layer.build(shape) outputs = layer(K.variable(inputs)) np_output = K.eval(outputs) for offset in [0, 1, -1, -2]: assert_allclose(np_output[:, offset, :], 0.) assert_allclose(np_output[:, 2:-2, :], 1.) layer = convolutional.ZeroPadding1D(padding=(1, 2)) layer.build(shape) outputs = layer(K.variable(inputs)) np_output = K.eval(outputs) for left_offset in [0]: assert_allclose(np_output[:, left_offset, :], 0.) for right_offset in [-1, -2]: assert_allclose(np_output[:, right_offset, :], 0.) assert_allclose(np_output[:, 1:-2, :], 1.) layer.get_config() @pytest.mark.parametrize( 'data_format,padding', [(data_format, padding) for data_format in ['channels_first', 'channels_last'] for padding in [(2, 2), ((1, 2), (3, 4))]] ) def test_zero_padding_2d(data_format, padding): num_samples = 2 stack_size = 2 input_num_row = 4 input_num_col = 5 if data_format == 'channels_last': inputs = np.ones((num_samples, input_num_row, input_num_col, stack_size)) else: inputs = np.ones((num_samples, stack_size, input_num_row, input_num_col)) layer_test(convolutional.ZeroPadding2D, kwargs={'padding': padding, 'data_format': data_format}, input_shape=inputs.shape) def test_zero_padding_2d_correctness(): num_samples = 2 stack_size = 2 input_num_row = 4 input_num_col = 5 inputs = np.ones((num_samples, stack_size, input_num_row, input_num_col)) for data_format in ['channels_first', 'channels_last']: layer = convolutional.ZeroPadding2D(padding=(2, 2), data_format=data_format) layer.build(inputs.shape) outputs = layer(K.variable(inputs)) np_output = K.eval(outputs) if data_format == 'channels_last': for offset in [0, 1, -1, -2]: assert_allclose(np_output[:, offset, :, :], 0.) assert_allclose(np_output[:, :, offset, :], 0.) assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.) elif data_format == 'channels_first': for offset in [0, 1, -1, -2]: assert_allclose(np_output[:, :, offset, :], 0.) assert_allclose(np_output[:, :, :, offset], 0.) assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.) layer = convolutional.ZeroPadding2D(padding=((1, 2), (3, 4)), data_format=data_format) layer.build(inputs.shape) outputs = layer(K.variable(inputs)) np_output = K.eval(outputs) if data_format == 'channels_last': for top_offset in [0]: assert_allclose(np_output[:, top_offset, :, :], 0.) for bottom_offset in [-1, -2]: assert_allclose(np_output[:, bottom_offset, :, :], 0.) for left_offset in [0, 1, 2]: assert_allclose(np_output[:, :, left_offset, :], 0.) for right_offset in [-1, -2, -3, -4]: assert_allclose(np_output[:, :, right_offset, :], 0.) assert_allclose(np_output[:, 1:-2, 3:-4, :], 1.) elif data_format == 'channels_first': for top_offset in [0]: assert_allclose(np_output[:, :, top_offset, :], 0.) for bottom_offset in [-1, -2]: assert_allclose(np_output[:, :, bottom_offset, :], 0.) for left_offset in [0, 1, 2]: assert_allclose(np_output[:, :, :, left_offset], 0.) for right_offset in [-1, -2, -3, -4]: assert_allclose(np_output[:, :, :, right_offset], 0.) assert_allclose(np_output[:, :, 1:-2, 3:-4], 1.) @pytest.mark.parametrize( 'data_format,padding', [(data_format, padding) for data_format in ['channels_first', 'channels_last'] for padding in [(2, 2, 2), ((1, 2), (3, 4), (0, 2))]] ) def test_zero_padding_3d(data_format, padding): num_samples = 2 stack_size = 2 input_len_dim1 = 4 input_len_dim2 = 5 input_len_dim3 = 3 inputs = np.ones((num_samples, input_len_dim1, input_len_dim2, input_len_dim3, stack_size)) layer_test(convolutional.ZeroPadding3D, kwargs={'padding': padding, 'data_format': data_format}, input_shape=inputs.shape) def test_zero_padding_3d_correctness(): num_samples = 2 stack_size = 2 input_len_dim1 = 4 input_len_dim2 = 5 input_len_dim3 = 3 inputs = np.ones((num_samples, input_len_dim1, input_len_dim2, input_len_dim3, stack_size)) for data_format in ['channels_first', 'channels_last']: layer = convolutional.ZeroPadding3D(padding=(2, 2, 2), data_format=data_format) layer.build(inputs.shape) outputs = layer(K.variable(inputs)) np_output = K.eval(outputs) if data_format == 'channels_last': for offset in [0, 1, -1, -2]: assert_allclose(np_output[:, offset, :, :, :], 0.) assert_allclose(np_output[:, :, offset, :, :], 0.) assert_allclose(np_output[:, :, :, offset, :], 0.) assert_allclose(np_output[:, 2:-2, 2:-2, 2:-2, :], 1.) elif data_format == 'channels_first': for offset in [0, 1, -1, -2]: assert_allclose(np_output[:, :, offset, :, :], 0.) assert_allclose(np_output[:, :, :, offset, :], 0.) assert_allclose(np_output[:, :, :, :, offset], 0.) assert_allclose(np_output[:, :, 2:-2, 2:-2, 2:-2], 1.) layer = convolutional.ZeroPadding3D(padding=((1, 2), (3, 4), (0, 2)), data_format=data_format) layer.build(inputs.shape) outputs = layer(K.variable(inputs)) np_output = K.eval(outputs) if data_format == 'channels_last': for dim1_offset in [0, -1, -2]: assert_allclose(np_output[:, dim1_offset, :, :, :], 0.) for dim2_offset in [0, 1, 2, -1, -2, -3, -4]: assert_allclose(np_output[:, :, dim2_offset, :, :], 0.) for dim3_offset in [-1, -2]: assert_allclose(np_output[:, :, :, dim3_offset, :], 0.) assert_allclose(np_output[:, 1:-2, 3:-4, 0:-2, :], 1.) elif data_format == 'channels_first': for dim1_offset in [0, -1, -2]: assert_allclose(np_output[:, :, dim1_offset, :, :], 0.) for dim2_offset in [0, 1, 2, -1, -2, -3, -4]: assert_allclose(np_output[:, :, :, dim2_offset, :], 0.) for dim3_offset in [-1, -2]: assert_allclose(np_output[:, :, :, :, dim3_offset], 0.) assert_allclose(np_output[:, :, 1:-2, 3:-4, 0:-2], 1.) def test_upsampling_1d(): layer_test(convolutional.UpSampling1D, kwargs={'size': 2}, input_shape=(3, 5, 4)) def test_upsampling_2d(): num_samples = 2 stack_size = 2 input_num_row = 11 input_num_col = 12 for data_format in ['channels_first', 'channels_last']: if data_format == 'channels_first': inputs = np.random.rand(num_samples, stack_size, input_num_row, input_num_col) else: # tf inputs = np.random.rand(num_samples, input_num_row, input_num_col, stack_size) # basic test layer_test(convolutional.UpSampling2D, kwargs={'size': (2, 2), 'data_format': data_format}, input_shape=inputs.shape) for length_row in [2]: for length_col in [2, 3]: layer = convolutional.UpSampling2D( size=(length_row, length_col), data_format=data_format) layer.build(inputs.shape) outputs = layer(K.variable(inputs)) np_output = K.eval(outputs) if data_format == 'channels_first': assert np_output.shape[2] == length_row * input_num_row assert np_output.shape[3] == length_col * input_num_col else: # tf assert np_output.shape[1] == length_row * input_num_row assert np_output.shape[2] == length_col * input_num_col # compare with numpy if data_format == 'channels_first': expected_out = np.repeat(inputs, length_row, axis=2) expected_out = np.repeat(expected_out, length_col, axis=3) else: # tf expected_out = np.repeat(inputs, length_row, axis=1) expected_out = np.repeat(expected_out, length_col, axis=2) assert_allclose(np_output, expected_out) @pytest.mark.skipif((K.backend() == 'cntk'), reason='cntk does not support it yet') @pytest.mark.parametrize('data_format', ['channels_first', 'channels_last']) def test_upsampling_2d_bilinear(data_format): num_samples = 2 stack_size = 2 input_num_row = 11 input_num_col = 12 if data_format == 'channels_first': inputs = np.random.rand(num_samples, stack_size, input_num_row, input_num_col) else: # tf inputs = np.random.rand(num_samples, input_num_row, input_num_col, stack_size) # basic test layer_test(convolutional.UpSampling2D, kwargs={'size': (2, 2), 'data_format': data_format, 'interpolation': 'bilinear'}, input_shape=inputs.shape) for length_row in [2]: for length_col in [2, 3]: layer = convolutional.UpSampling2D( size=(length_row, length_col), data_format=data_format) layer.build(inputs.shape) outputs = layer(K.variable(inputs)) np_output = K.eval(outputs) if data_format == 'channels_first': assert np_output.shape[2] == length_row * input_num_row assert np_output.shape[3] == length_col * input_num_col else: # tf assert np_output.shape[1] == length_row * input_num_row assert np_output.shape[2] == length_col * input_num_col @pytest.mark.skipif((K.backend() == 'cntk'), reason="cntk does not support it yet") def test_upsampling_3d(): num_samples = 2 stack_size = 2 input_len_dim1 = 10 input_len_dim2 = 11 input_len_dim3 = 12 for data_format in ['channels_first', 'channels_last']: if data_format == 'channels_first': inputs = np.random.rand(num_samples, stack_size, input_len_dim1, input_len_dim2, input_len_dim3) else: # tf inputs = np.random.rand(num_samples, input_len_dim1, input_len_dim2, input_len_dim3, stack_size) # basic test layer_test(convolutional.UpSampling3D, kwargs={'size': (2, 2, 2), 'data_format': data_format}, input_shape=inputs.shape) for length_dim1 in [2, 3]: for length_dim2 in [2]: for length_dim3 in [3]: layer = convolutional.UpSampling3D( size=(length_dim1, length_dim2, length_dim3), data_format=data_format) layer.build(inputs.shape) outputs = layer(K.variable(inputs)) np_output = K.eval(outputs) if data_format == 'channels_first': assert np_output.shape[2] == length_dim1 * input_len_dim1 assert np_output.shape[3] == length_dim2 * input_len_dim2 assert np_output.shape[4] == length_dim3 * input_len_dim3 else: # tf assert np_output.shape[1] == length_dim1 * input_len_dim1 assert np_output.shape[2] == length_dim2 * input_len_dim2 assert np_output.shape[3] == length_dim3 * input_len_dim3 # compare with numpy if data_format == 'channels_first': expected_out = np.repeat(inputs, length_dim1, axis=2) expected_out = np.repeat(expected_out, length_dim2, axis=3) expected_out = np.repeat(expected_out, length_dim3, axis=4) else: # tf expected_out = np.repeat(inputs, length_dim1, axis=1) expected_out = np.repeat(expected_out, length_dim2, axis=2) expected_out = np.repeat(expected_out, length_dim3, axis=3) assert_allclose(np_output, expected_out) @pytest.mark.skipif((K.backend() == 'cntk'), reason="cntk does not support slice to 0 dimension") def test_cropping_1d(): num_samples = 2 time_length = 4 input_len_dim1 = 2 inputs = np.random.rand(num_samples, time_length, input_len_dim1) layer_test(convolutional.Cropping1D, kwargs={'cropping': (2, 2)}, input_shape=inputs.shape) def test_cropping_2d(): num_samples = 2 stack_size = 2 input_len_dim1 = 9 input_len_dim2 = 9 cropping = ((2, 2), (3, 3)) for data_format in ['channels_first', 'channels_last']: if data_format == 'channels_first': inputs = np.random.rand(num_samples, stack_size, input_len_dim1, input_len_dim2) else: inputs = np.random.rand(num_samples, input_len_dim1, input_len_dim2, stack_size) # basic test layer_test(convolutional.Cropping2D, kwargs={'cropping': cropping, 'data_format': data_format}, input_shape=inputs.shape) # correctness test layer = convolutional.Cropping2D(cropping=cropping, data_format=data_format) layer.build(inputs.shape) outputs = layer(K.variable(inputs)) np_output = K.eval(outputs) # compare with numpy if data_format == 'channels_first': expected_out = inputs[:, :, cropping[0][0]: -cropping[0][1], cropping[1][0]: -cropping[1][1]] else: expected_out = inputs[:, cropping[0][0]: -cropping[0][1], cropping[1][0]: -cropping[1][1], :] assert_allclose(np_output, expected_out) for data_format in ['channels_first', 'channels_last']: if data_format == 'channels_first': inputs = np.random.rand(num_samples, stack_size, input_len_dim1, input_len_dim2) else: inputs = np.random.rand(num_samples, input_len_dim1, input_len_dim2, stack_size) # another correctness test (no cropping) cropping = ((0, 0), (0, 0)) layer = convolutional.Cropping2D(cropping=cropping, data_format=data_format) layer.build(inputs.shape) outputs = layer(K.variable(inputs)) np_output = K.eval(outputs) # compare with input assert_allclose(np_output, inputs) # Test invalid use cases with pytest.raises(ValueError): layer = convolutional.Cropping2D(cropping=((1, 1),)) with pytest.raises(ValueError): layer = convolutional.Cropping2D(cropping=lambda x: x) def test_cropping_3d(): num_samples = 2 stack_size = 2 input_len_dim1 = 8 input_len_dim2 = 8 input_len_dim3 = 8 cropping = ((2, 2), (3, 3), (2, 3)) for data_format in ['channels_last', 'channels_first']: if data_format == 'channels_first': inputs = np.random.rand(num_samples, stack_size, input_len_dim1, input_len_dim2, input_len_dim3) else: inputs = np.random.rand(num_samples, input_len_dim1, input_len_dim2, input_len_dim3, stack_size) # basic test layer_test(convolutional.Cropping3D, kwargs={'cropping': cropping, 'data_format': data_format}, input_shape=inputs.shape) # correctness test layer = convolutional.Cropping3D(cropping=cropping, data_format=data_format) layer.build(inputs.shape) outputs = layer(K.variable(inputs)) np_output = K.eval(outputs) # compare with numpy if data_format == 'channels_first': expected_out = inputs[:, :, cropping[0][0]: -cropping[0][1], cropping[1][0]: -cropping[1][1], cropping[2][0]: -cropping[2][1]] else: expected_out = inputs[:, cropping[0][0]: -cropping[0][1], cropping[1][0]: -cropping[1][1], cropping[2][0]: -cropping[2][1], :] assert_allclose(np_output, expected_out) for data_format in ['channels_last', 'channels_first']: if data_format == 'channels_first': inputs = np.random.rand(num_samples, stack_size, input_len_dim1, input_len_dim2, input_len_dim3) else: inputs = np.random.rand(num_samples, input_len_dim1, input_len_dim2, input_len_dim3, stack_size) # another correctness test (no cropping) cropping = ((0, 0), (0, 0), (0, 0)) layer = convolutional.Cropping3D(cropping=cropping, data_format=data_format) layer.build(inputs.shape) outputs = layer(K.variable(inputs)) np_output = K.eval(outputs) # compare with input assert_allclose(np_output, inputs) # Test invalid use cases with pytest.raises(ValueError): layer = convolutional.Cropping3D(cropping=((1, 1),)) with pytest.raises(ValueError): layer = convolutional.Cropping3D(cropping=lambda x: x) @pytest.mark.skipif((K.backend() == 'cntk'), reason='CNTK does not support float64') @pytest.mark.parametrize( 'input_shape,conv_class', [((2, 4, 2), convolutional.Conv1D), ((2, 4, 4, 2), convolutional.Conv2D), ((2, 4, 4, 4, 2), convolutional.Conv3D)] ) def test_conv_float64(input_shape, conv_class): kernel_size = 3 strides = 1 filters = 3 K.set_floatx('float64') layer_test(conv_class, kwargs={'filters': filters, 'kernel_size': kernel_size, 'padding': 'valid', 'strides': strides}, input_shape=input_shape) K.set_floatx('float32') if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/layers/noise_test.py0000644000000000116100000000161713354530144020624 0ustar rooteng00000000000000import pytest from keras.utils.test_utils import layer_test from keras.layers import noise from keras import backend as K @pytest.mark.skipif((K.backend() == 'cntk'), reason="cntk does not support it yet") def test_GaussianNoise(): layer_test(noise.GaussianNoise, kwargs={'stddev': 1.}, input_shape=(3, 2, 3)) @pytest.mark.skipif((K.backend() == 'cntk'), reason="cntk does not support it yet") def test_GaussianDropout(): layer_test(noise.GaussianDropout, kwargs={'rate': 0.5}, input_shape=(3, 2, 3)) @pytest.mark.skipif((K.backend() == 'cntk'), reason="cntk does not support it yet") def test_AlphaDropout(): layer_test(noise.AlphaDropout, kwargs={'rate': 0.1}, input_shape=(3, 2, 3)) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/constraints_test.py0000644000000000116100000000506313354530144020556 0ustar rooteng00000000000000import pytest import numpy as np from numpy.testing import assert_allclose from keras import backend as K from keras import constraints def get_test_values(): return [0.1, 0.5, 3, 8, 1e-7] def get_example_array(): np.random.seed(3537) example_array = np.random.random((100, 100)) * 100. - 50. example_array[0, 0] = 0. # 0 could possibly cause trouble return example_array def test_serialization(): all_activations = ['max_norm', 'non_neg', 'unit_norm', 'min_max_norm'] for name in all_activations: fn = constraints.get(name) ref_fn = getattr(constraints, name)() assert fn.__class__ == ref_fn.__class__ config = constraints.serialize(fn) fn = constraints.deserialize(config) assert fn.__class__ == ref_fn.__class__ def test_max_norm(): array = get_example_array() for m in get_test_values(): norm_instance = constraints.max_norm(m) normed = norm_instance(K.variable(array)) assert(np.all(K.eval(normed) < m)) # a more explicit example norm_instance = constraints.max_norm(2.0) x = np.array([[0, 0, 0], [1.0, 0, 0], [3, 0, 0], [3, 3, 3]]).T x_normed_target = np.array([[0, 0, 0], [1.0, 0, 0], [2.0, 0, 0], [2. / np.sqrt(3), 2. / np.sqrt(3), 2. / np.sqrt(3)]]).T x_normed_actual = K.eval(norm_instance(K.variable(x))) assert_allclose(x_normed_actual, x_normed_target, rtol=1e-05) def test_non_neg(): non_neg_instance = constraints.non_neg() normed = non_neg_instance(K.variable(get_example_array())) assert(np.all(np.min(K.eval(normed), axis=1) == 0.)) def test_unit_norm(): unit_norm_instance = constraints.unit_norm() normalized = unit_norm_instance(K.variable(get_example_array())) norm_of_normalized = np.sqrt(np.sum(K.eval(normalized) ** 2, axis=0)) # In the unit norm constraint, it should be equal to 1. difference = norm_of_normalized - 1. largest_difference = np.max(np.abs(difference)) assert(np.abs(largest_difference) < 10e-5) def test_min_max_norm(): array = get_example_array() for m in get_test_values(): norm_instance = constraints.min_max_norm(min_value=m, max_value=m * 2) normed = norm_instance(K.variable(array)) value = K.eval(normed) l2 = np.sqrt(np.sum(np.square(value), axis=0)) assert not l2[l2 < m] assert not l2[l2 > m * 2 + 1e-5] if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/optimizers_test.py0000644000000000116100000001140513354530144020411 0ustar rooteng00000000000000from __future__ import print_function import pytest import numpy as np from numpy.testing import assert_allclose from keras.utils import test_utils from keras import optimizers, Input from keras.models import Sequential, Model from keras.layers.core import Dense, Activation, Lambda from keras.utils.np_utils import to_categorical from keras import backend as K num_classes = 2 def get_test_data(): np.random.seed(1337) (x_train, y_train), _ = test_utils.get_test_data(num_train=1000, num_test=200, input_shape=(10,), classification=True, num_classes=num_classes) y_train = to_categorical(y_train) return x_train, y_train def _test_optimizer(optimizer, target=0.75): x_train, y_train = get_test_data() model = Sequential() model.add(Dense(10, input_shape=(x_train.shape[1],))) model.add(Activation('relu')) model.add(Dense(y_train.shape[1])) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=2, batch_size=16, verbose=0) assert history.history['acc'][-1] >= target config = optimizers.serialize(optimizer) optim = optimizers.deserialize(config) new_config = optimizers.serialize(optim) new_config['class_name'] = new_config['class_name'].lower() assert config == new_config # Test constraints. model = Sequential() dense = Dense(10, input_shape=(x_train.shape[1],), kernel_constraint=lambda x: 0. * x + 1., bias_constraint=lambda x: 0. * x + 2.,) model.add(dense) model.add(Activation('relu')) model.add(Dense(y_train.shape[1])) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) model.train_on_batch(x_train[:10], y_train[:10]) kernel, bias = dense.get_weights() assert_allclose(kernel, 1.) assert_allclose(bias, 2.) @pytest.mark.skipif((K.backend() != 'tensorflow'), reason="Only Tensorflow raises a " "ValueError if the gradient is null.") def test_no_grad(): inp = Input([3]) x = Dense(10)(inp) x = Lambda(lambda l: 1.0 * K.reshape(K.cast(K.argmax(l), 'float32'), [-1, 1]), output_shape=lambda x: [x[0], 1])(x) mod = Model(inp, x) mod.compile('sgd', 'mse') with pytest.raises(ValueError): mod.fit(np.zeros([10, 3]), np.zeros([10, 1], np.float32), batch_size=10, epochs=10) def test_sgd(): sgd = optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True) _test_optimizer(sgd) def test_rmsprop(): _test_optimizer(optimizers.RMSprop()) _test_optimizer(optimizers.RMSprop(decay=1e-3)) def test_adagrad(): _test_optimizer(optimizers.Adagrad()) _test_optimizer(optimizers.Adagrad(decay=1e-3)) def test_adadelta(): _test_optimizer(optimizers.Adadelta(), target=0.6) _test_optimizer(optimizers.Adadelta(decay=1e-3), target=0.6) def test_adam(): _test_optimizer(optimizers.Adam()) _test_optimizer(optimizers.Adam(decay=1e-3)) def test_adamax(): _test_optimizer(optimizers.Adamax()) _test_optimizer(optimizers.Adamax(decay=1e-3)) def test_nadam(): _test_optimizer(optimizers.Nadam()) def test_adam_amsgrad(): _test_optimizer(optimizers.Adam(amsgrad=True)) _test_optimizer(optimizers.Adam(amsgrad=True, decay=1e-3)) def test_clipnorm(): sgd = optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=0.5) _test_optimizer(sgd) def test_clipvalue(): sgd = optimizers.SGD(lr=0.01, momentum=0.9, clipvalue=0.5) _test_optimizer(sgd) @pytest.mark.skipif((K.backend() != 'tensorflow'), reason='Requires TensorFlow backend') def test_tfoptimizer(): from keras import constraints from tensorflow import train optimizer = optimizers.TFOptimizer(train.AdamOptimizer()) model = Sequential() model.add(Dense(num_classes, input_shape=(3,), kernel_constraint=constraints.MaxNorm(1))) model.compile(loss='mean_squared_error', optimizer=optimizer) model.fit(np.random.random((5, 3)), np.random.random((5, num_classes)), epochs=1, batch_size=5, verbose=0) # not supported with pytest.raises(NotImplementedError): optimizer.weights with pytest.raises(NotImplementedError): optimizer.get_config() with pytest.raises(NotImplementedError): optimizer.from_config(None) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/initializers_test.py0000644000000000116100000001147013342055016020711 0ustar rooteng00000000000000import pytest import numpy as np from keras import initializers from keras import backend as K # 2D tensor test fixture FC_SHAPE = (200, 100) # 4D convolution in th order. This shape has the same effective shape as FC_SHAPE CONV_SHAPE = (25, 25, 20, 20) def _runner(init, shape, target_mean=None, target_std=None, target_max=None, target_min=None): variable = K.variable(init(shape)) output = K.get_value(variable) lim = 3e-2 if target_std is not None: assert abs(output.std() - target_std) < lim if target_mean is not None: assert abs(output.mean() - target_mean) < lim if target_max is not None: assert abs(output.max() - target_max) < lim if target_min is not None: assert abs(output.min() - target_min) < lim @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) def test_uniform(tensor_shape): _runner(initializers.RandomUniform(minval=-1, maxval=1), tensor_shape, target_mean=0., target_max=1, target_min=-1) @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) def test_normal(tensor_shape): _runner(initializers.RandomNormal(mean=0, stddev=1), tensor_shape, target_mean=0., target_std=1) @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) def test_truncated_normal(tensor_shape): _runner(initializers.TruncatedNormal(mean=0, stddev=1), tensor_shape, target_mean=0., target_max=2, target_min=-2) @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) def test_constant(tensor_shape): _runner(initializers.Constant(2), tensor_shape, target_mean=2, target_max=2, target_min=2) @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) def test_lecun_uniform(tensor_shape): fan_in, _ = initializers._compute_fans(tensor_shape) std = np.sqrt(1. / fan_in) _runner(initializers.lecun_uniform(), tensor_shape, target_mean=0., target_std=std) @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) def test_glorot_uniform(tensor_shape): fan_in, fan_out = initializers._compute_fans(tensor_shape) std = np.sqrt(2. / (fan_in + fan_out)) _runner(initializers.glorot_uniform(), tensor_shape, target_mean=0., target_std=std) @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) def test_he_uniform(tensor_shape): fan_in, _ = initializers._compute_fans(tensor_shape) std = np.sqrt(2. / fan_in) _runner(initializers.he_uniform(), tensor_shape, target_mean=0., target_std=std) @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) def test_lecun_normal(tensor_shape): fan_in, _ = initializers._compute_fans(tensor_shape) std = np.sqrt(1. / fan_in) _runner(initializers.lecun_normal(), tensor_shape, target_mean=0., target_std=std) @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) def test_glorot_normal(tensor_shape): fan_in, fan_out = initializers._compute_fans(tensor_shape) std = np.sqrt(2. / (fan_in + fan_out)) _runner(initializers.glorot_normal(), tensor_shape, target_mean=0., target_std=std) @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) def test_he_normal(tensor_shape): fan_in, _ = initializers._compute_fans(tensor_shape) std = np.sqrt(2. / fan_in) _runner(initializers.he_normal(), tensor_shape, target_mean=0., target_std=std) @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) def test_orthogonal(tensor_shape): _runner(initializers.orthogonal(), tensor_shape, target_mean=0.) @pytest.mark.parametrize('tensor_shape', [(100, 100), (10, 20), (30, 80), (1, 2, 3, 4)], ids=['FC', 'RNN', 'RNN_INVALID', 'CONV']) def test_identity(tensor_shape): if len(tensor_shape) > 2 or max(tensor_shape) % min(tensor_shape) != 0: with pytest.raises(ValueError): _runner(initializers.identity(), tensor_shape, target_mean=1. / tensor_shape[0], target_max=1.) else: _runner(initializers.identity(), tensor_shape, target_mean=1. / tensor_shape[0], target_max=1.) @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) def test_zero(tensor_shape): _runner(initializers.zeros(), tensor_shape, target_mean=0., target_max=0.) @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) def test_one(tensor_shape): _runner(initializers.ones(), tensor_shape, target_mean=1., target_max=1.) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/regularizers_test.py0000644000000000116100000000676213240665765020750 0ustar rooteng00000000000000import pytest from keras.models import Sequential, Model from keras.layers import Dense, Input, Average from keras.utils import np_utils from keras.utils import test_utils from keras import regularizers from keras import backend as K data_dim = 5 num_classes = 2 batch_size = 10 def get_data(): (x_train, y_train), _ = test_utils.get_test_data( num_train=batch_size, num_test=batch_size, input_shape=(data_dim,), classification=True, num_classes=num_classes) y_train = np_utils.to_categorical(y_train, num_classes) return x_train, y_train def create_model(kernel_regularizer=None, activity_regularizer=None): model = Sequential() model.add(Dense(num_classes, kernel_regularizer=kernel_regularizer, activity_regularizer=activity_regularizer, input_shape=(data_dim,))) return model def create_multi_input_model_from(layer1, layer2): input_1 = Input(shape=(data_dim,)) input_2 = Input(shape=(data_dim,)) out1 = layer1(input_1) out2 = layer2(input_2) out = Average()([out1, out2]) model = Model([input_1, input_2], out) model.add_loss(K.mean(out2)) model.add_loss(1) model.add_loss(1) return model def test_kernel_regularization(): x_train, y_train = get_data() for reg in [regularizers.l1(), regularizers.l2(), regularizers.l1_l2()]: model = create_model(kernel_regularizer=reg) model.compile(loss='categorical_crossentropy', optimizer='sgd') assert len(model.losses) == 1 model.train_on_batch(x_train, y_train) def test_activity_regularization(): x_train, y_train = get_data() for reg in [regularizers.l1(), regularizers.l2()]: model = create_model(activity_regularizer=reg) model.compile(loss='categorical_crossentropy', optimizer='sgd') assert len(model.losses) == 1 model.train_on_batch(x_train, y_train) def test_regularization_shared_layer(): dense_layer = Dense(num_classes, kernel_regularizer=regularizers.l1(), activity_regularizer=regularizers.l1()) model = create_multi_input_model_from(dense_layer, dense_layer) model.compile(loss='categorical_crossentropy', optimizer='sgd') assert len(model.losses) == 6 def test_regularization_shared_model(): dense_layer = Dense(num_classes, kernel_regularizer=regularizers.l1(), activity_regularizer=regularizers.l1()) input_tensor = Input(shape=(data_dim,)) dummy_model = Model(input_tensor, dense_layer(input_tensor)) model = create_multi_input_model_from(dummy_model, dummy_model) model.compile(loss='categorical_crossentropy', optimizer='sgd') assert len(model.losses) == 6 def test_regularization_shared_layer_in_different_models(): shared_dense = Dense(num_classes, kernel_regularizer=regularizers.l1(), activity_regularizer=regularizers.l1()) models = [] for _ in range(2): input_tensor = Input(shape=(data_dim,)) unshared_dense = Dense(num_classes, kernel_regularizer=regularizers.l1()) out = unshared_dense(shared_dense(input_tensor)) models.append(Model(input_tensor, out)) model = create_multi_input_model_from(*models) model.compile(loss='categorical_crossentropy', optimizer='sgd') assert len(model.losses) == 8 if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/legacy/0000755000000000116100000000000013355226624016044 5ustar rooteng00000000000000Keras-2.2.4/tests/keras/legacy/layers_test.py0000644000000000116100000000246413354530144020754 0ustar rooteng00000000000000import pytest from keras.utils.test_utils import layer_test from keras.legacy import layers as legacy_layers from keras import regularizers from keras import constraints def test_highway(): layer_test(legacy_layers.Highway, kwargs={}, input_shape=(3, 2)) layer_test(legacy_layers.Highway, kwargs={'W_regularizer': regularizers.l2(0.01), 'b_regularizer': regularizers.l1(0.01), 'activity_regularizer': regularizers.l2(0.01), 'W_constraint': constraints.MaxNorm(1), 'b_constraint': constraints.MaxNorm(1)}, input_shape=(3, 2)) def test_maxout_dense(): layer_test(legacy_layers.MaxoutDense, kwargs={'output_dim': 3}, input_shape=(3, 2)) layer_test(legacy_layers.MaxoutDense, kwargs={'output_dim': 3, 'W_regularizer': regularizers.l2(0.01), 'b_regularizer': regularizers.l1(0.01), 'activity_regularizer': regularizers.l2(0.01), 'W_constraint': constraints.MaxNorm(1), 'b_constraint': constraints.MaxNorm(1)}, input_shape=(3, 2)) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/legacy/interface_test.py0000644000000000116100000013246713354530144021424 0ustar rooteng00000000000000import pytest import json import keras import numpy as np def test_dense_legacy_interface(): old_layer = keras.layers.Dense(input_dim=3, output_dim=2, name='d') new_layer = keras.layers.Dense(2, input_shape=(3,), name='d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Dense(2, bias=False, init='normal', W_regularizer='l1', W_constraint='maxnorm', name='d') new_layer = keras.layers.Dense(2, use_bias=False, kernel_initializer='normal', kernel_regularizer='l1', kernel_constraint='max_norm', name='d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Dense(2, bias=True, b_regularizer='l1', b_constraint='maxnorm', name='d') new_layer = keras.layers.Dense(2, use_bias=True, bias_regularizer='l1', bias_constraint='max_norm', name='d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_dropout_legacy_interface(): old_layer = keras.layers.Dropout(p=3, name='drop') new_layer1 = keras.layers.Dropout(rate=3, name='drop') new_layer2 = keras.layers.Dropout(3, name='drop') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer1.get_config()) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer2.get_config()) def test_embedding_legacy_interface(): old_layer = keras.layers.Embedding(4, 2, name='d') new_layer = keras.layers.Embedding(output_dim=2, input_dim=4, name='d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Embedding(input_dim=4, output_dim=2, name='d', init='normal', W_regularizer='l1', W_constraint='maxnorm') new_layer = keras.layers.Embedding(input_dim=4, output_dim=2, name='d', embeddings_initializer='normal', embeddings_regularizer='l1', embeddings_constraint='max_norm') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Embedding(1, 1, dropout=0.0, name='d') new_layer = keras.layers.Embedding(1, 1, name='d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_maxpooling1d_legacy_interface(): old_layer = keras.layers.MaxPool1D(pool_length=2, border_mode='valid', name='maxpool1d') new_layer = keras.layers.MaxPool1D(pool_size=2, padding='valid', name='maxpool1d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.MaxPool1D(2, padding='valid', name='maxpool1d') new_layer = keras.layers.MaxPool1D(pool_size=2, padding='valid', name='maxpool1d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_avgpooling1d_legacy_interface(): old_layer = keras.layers.AvgPool1D(pool_length=2, border_mode='valid', name='d') new_layer = keras.layers.AvgPool1D(pool_size=2, padding='valid', name='d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.AvgPool1D(2, padding='valid', name='d') new_layer = keras.layers.AvgPool1D(pool_size=2, padding='valid', name='d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_prelu_legacy_interface(): old_layer = keras.layers.PReLU(init='zero', name='p') new_layer = keras.layers.PReLU('zero', name='p') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_gaussiannoise_legacy_interface(): old_layer = keras.layers.GaussianNoise(sigma=0.5, name='gn') new_layer = keras.layers.GaussianNoise(stddev=0.5, name='gn') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_lstm_legacy_interface(): old_layer = keras.layers.LSTM(input_shape=[3, 5], output_dim=2, name='d') new_layer = keras.layers.LSTM(2, input_shape=[3, 5], name='d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.LSTM(input_shape=[3, 5], output_dim=2, name='d', consume_less='mem') new_layer = keras.layers.LSTM(2, input_shape=[3, 5], name='d', implementation=1) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.LSTM(input_dim=5, input_length=3, output_dim=2, name='d', consume_less='mem') new_layer = keras.layers.LSTM(2, input_shape=[3, 5], name='d', implementation=1) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.LSTM(input_dim=5, output_dim=2, name='d', consume_less='mem') new_layer = keras.layers.LSTM(2, input_shape=[None, 5], name='d', implementation=1) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.LSTM(input_shape=[3, 5], output_dim=2, name='d', consume_less='gpu') new_layer = keras.layers.LSTM(2, input_shape=[3, 5], name='d', implementation=2) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.LSTM(2, init='normal', inner_init='glorot_uniform', forget_bias_init='one', inner_activation='hard_sigmoid', W_regularizer='l1', U_regularizer='l1', b_regularizer='l1', dropout_W=0.1, dropout_U=0.1, name='LSTM') new_layer = keras.layers.LSTM(2, kernel_initializer='normal', recurrent_initializer='glorot_uniform', unit_forget_bias=True, recurrent_activation='hard_sigmoid', kernel_regularizer='l1', recurrent_regularizer='l1', bias_regularizer='l1', dropout=0.1, recurrent_dropout=0.1, name='LSTM') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.LSTM(2, init='normal', inner_init='glorot_uniform', forget_bias_init='zero', inner_activation='hard_sigmoid', W_regularizer='l1', U_regularizer='l1', b_regularizer='l1', dropout_W=0.1, dropout_U=0.1, name='LSTM') new_layer = keras.layers.LSTM(2, kernel_initializer='normal', recurrent_initializer='glorot_uniform', unit_forget_bias=True, recurrent_activation='hard_sigmoid', kernel_regularizer='l1', recurrent_regularizer='l1', bias_regularizer='l1', dropout=0.1, recurrent_dropout=0.1, name='LSTM') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_simplernn_legacy_interface(): old_layer = keras.layers.SimpleRNN(input_shape=[3, 5], output_dim=2, name='d') new_layer = keras.layers.SimpleRNN(2, input_shape=[3, 5], name='d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.SimpleRNN(2, init='normal', inner_init='glorot_uniform', W_regularizer='l1', U_regularizer='l1', b_regularizer='l1', dropout_W=0.1, dropout_U=0.1, name='SimpleRNN') new_layer = keras.layers.SimpleRNN(2, kernel_initializer='normal', recurrent_initializer='glorot_uniform', kernel_regularizer='l1', recurrent_regularizer='l1', bias_regularizer='l1', dropout=0.1, recurrent_dropout=0.1, name='SimpleRNN') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_gru_legacy_interface(): old_layer = keras.layers.GRU(input_shape=[3, 5], output_dim=2, name='d') new_layer = keras.layers.GRU(2, input_shape=[3, 5], name='d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.GRU(2, init='normal', inner_init='glorot_uniform', inner_activation='hard_sigmoid', W_regularizer='l1', U_regularizer='l1', b_regularizer='l1', dropout_W=0.1, dropout_U=0.1, name='GRU') new_layer = keras.layers.GRU(2, kernel_initializer='normal', recurrent_initializer='glorot_uniform', recurrent_activation='hard_sigmoid', kernel_regularizer='l1', recurrent_regularizer='l1', bias_regularizer='l1', dropout=0.1, recurrent_dropout=0.1, name='GRU') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_gaussiandropout_legacy_interface(): old_layer = keras.layers.GaussianDropout(p=0.6, name='drop') new_layer1 = keras.layers.GaussianDropout(rate=0.6, name='drop') new_layer2 = keras.layers.GaussianDropout(0.6, name='drop') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer1.get_config()) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer2.get_config()) def test_maxpooling2d_legacy_interface(): old_layer = keras.layers.MaxPooling2D( pool_size=(2, 2), border_mode='valid', name='maxpool2d') new_layer = keras.layers.MaxPool2D( pool_size=2, padding='valid', name='maxpool2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.MaxPooling2D((2, 2), 2, 'valid', name='maxpool2d') new_layer = keras.layers.MaxPool2D( pool_size=2, strides=2, padding='valid', name='maxpool2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.MaxPooling2D( (2, 2), padding='valid', dim_ordering='tf', name='maxpool2d') new_layer = keras.layers.MaxPool2D( pool_size=2, padding='valid', data_format='channels_last', name='maxpool2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.MaxPooling2D( (2, 2), padding='valid', dim_ordering='th', name='maxpool2d') new_layer = keras.layers.MaxPool2D( pool_size=2, padding='valid', data_format='channels_first', name='maxpool2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.MaxPooling2D( (2, 2), padding='valid', dim_ordering='default', name='maxpool2d') new_layer = keras.layers.MaxPool2D( pool_size=2, padding='valid', name='maxpool2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_avgpooling2d_legacy_interface(): old_layer = keras.layers.AveragePooling2D( pool_size=(2, 2), border_mode='valid', name='avgpooling2d') new_layer = keras.layers.AvgPool2D( pool_size=(2, 2), padding='valid', name='avgpooling2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.AveragePooling2D( (2, 2), (2, 2), 'valid', name='avgpooling2d') new_layer = keras.layers.AvgPool2D( pool_size=(2, 2), strides=(2, 2), padding='valid', name='avgpooling2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.AveragePooling2D( (2, 2), padding='valid', dim_ordering='tf', name='avgpooling2d') new_layer = keras.layers.AvgPool2D( pool_size=2, padding='valid', data_format='channels_last', name='avgpooling2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.AveragePooling2D( (2, 2), padding='valid', dim_ordering='th', name='avgpooling2d') new_layer = keras.layers.AvgPool2D( pool_size=2, padding='valid', data_format='channels_first', name='avgpooling2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.AveragePooling2D( (2, 2), padding='valid', dim_ordering='default', name='avgpooling2d') new_layer = keras.layers.AvgPool2D( pool_size=2, padding='valid', name='avgpooling2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_maxpooling3d_legacy_interface(): old_layer = keras.layers.MaxPooling3D( pool_size=(2, 2, 2), border_mode='valid', name='maxpool3d') new_layer = keras.layers.MaxPool3D( pool_size=(2, 2, 2), padding='valid', name='maxpool3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.MaxPooling3D( (2, 2, 2), (2, 2, 2), 'valid', name='maxpool3d') new_layer = keras.layers.MaxPool3D( pool_size=(2, 2, 2), strides=(2, 2, 2), padding='valid', name='maxpool3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.MaxPooling3D( (2, 2, 2), padding='valid', dim_ordering='tf', name='maxpool3d') new_layer = keras.layers.MaxPool3D( pool_size=(2, 2, 2), padding='valid', data_format='channels_last', name='maxpool3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.MaxPooling3D( (2, 2, 2), padding='valid', dim_ordering='th', name='maxpool3d') new_layer = keras.layers.MaxPool3D( pool_size=(2, 2, 2), padding='valid', data_format='channels_first', name='maxpool3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.MaxPooling3D( (2, 2, 2), padding='valid', dim_ordering='default', name='maxpool3d') new_layer = keras.layers.MaxPool3D( pool_size=(2, 2, 2), padding='valid', name='maxpool3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_avgpooling3d_legacy_interface(): old_layer = keras.layers.AveragePooling3D( pool_size=(2, 2, 2), border_mode='valid', name='avgpooling3d') new_layer = keras.layers.AvgPool3D( pool_size=(2, 2, 2), padding='valid', name='avgpooling3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.AveragePooling3D( (2, 2, 2), (2, 2, 2), 'valid', name='avgpooling3d') new_layer = keras.layers.AvgPool3D( pool_size=(2, 2, 2), strides=(2, 2, 2), padding='valid', name='avgpooling3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.AveragePooling3D( (2, 2, 2), padding='valid', dim_ordering='tf', name='avgpooling3d') new_layer = keras.layers.AvgPool3D( pool_size=(2, 2, 2), padding='valid', data_format='channels_last', name='avgpooling3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.AveragePooling3D( (2, 2, 2), padding='valid', dim_ordering='th', name='avgpooling3d') new_layer = keras.layers.AvgPool3D( pool_size=(2, 2, 2), padding='valid', data_format='channels_first', name='avgpooling3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.AveragePooling3D( (2, 2, 2), padding='valid', dim_ordering='default', name='avgpooling3d') new_layer = keras.layers.AvgPool3D( pool_size=(2, 2, 2), padding='valid', name='avgpooling3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_global_maxpooling2d_legacy_interface(): old_layer = keras.layers.GlobalMaxPooling2D(dim_ordering='tf', name='global_maxpool2d') new_layer = keras.layers.GlobalMaxPool2D(data_format='channels_last', name='global_maxpool2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.GlobalMaxPooling2D(dim_ordering='th', name='global_maxpool2d') new_layer = keras.layers.GlobalMaxPool2D(data_format='channels_first', name='global_maxpool2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.GlobalMaxPooling2D(dim_ordering='default', name='global_maxpool2d') new_layer = keras.layers.GlobalMaxPool2D(name='global_maxpool2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_global_avgpooling2d_legacy_interface(): old_layer = keras.layers.GlobalAveragePooling2D(dim_ordering='tf', name='global_avgpool2d') new_layer = keras.layers.GlobalAvgPool2D(data_format='channels_last', name='global_avgpool2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.GlobalAveragePooling2D(dim_ordering='th', name='global_avgpool2d') new_layer = keras.layers.GlobalAvgPool2D(data_format='channels_first', name='global_avgpool2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.GlobalAveragePooling2D(dim_ordering='default', name='global_avgpool2d') new_layer = keras.layers.GlobalAvgPool2D(name='global_avgpool2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_global_maxpooling3d_legacy_interface(): old_layer = keras.layers.GlobalMaxPooling3D(dim_ordering='tf', name='global_maxpool3d') new_layer = keras.layers.GlobalMaxPool3D(data_format='channels_last', name='global_maxpool3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.GlobalMaxPooling3D(dim_ordering='th', name='global_maxpool3d') new_layer = keras.layers.GlobalMaxPool3D(data_format='channels_first', name='global_maxpool3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.GlobalMaxPooling3D(dim_ordering='default', name='global_maxpool3d') new_layer = keras.layers.GlobalMaxPool3D(name='global_maxpool3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_global_avgpooling3d_legacy_interface(): old_layer = keras.layers.GlobalAveragePooling3D(dim_ordering='tf', name='global_avgpool3d') new_layer = keras.layers.GlobalAvgPool3D(data_format='channels_last', name='global_avgpool3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.GlobalAveragePooling3D(dim_ordering='th', name='global_avgpool3d') new_layer = keras.layers.GlobalAvgPool3D(data_format='channels_first', name='global_avgpool3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.GlobalAveragePooling3D(dim_ordering='default', name='global_avgpool3d') new_layer = keras.layers.GlobalAvgPool3D(name='global_avgpool3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_upsampling1d_legacy_interface(): old_layer = keras.layers.UpSampling1D(length=3, name='us1d') new_layer_1 = keras.layers.UpSampling1D(size=3, name='us1d') new_layer_2 = keras.layers.UpSampling1D(3, name='us1d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer_1.get_config()) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer_2.get_config()) def test_upsampling2d_legacy_interface(): old_layer = keras.layers.UpSampling2D((2, 2), dim_ordering='tf', name='us2d') new_layer = keras.layers.UpSampling2D((2, 2), data_format='channels_last', name='us2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_upsampling3d_legacy_interface(): old_layer = keras.layers.UpSampling3D((2, 2, 2), dim_ordering='tf', name='us3d') new_layer = keras.layers.UpSampling3D((2, 2, 2), data_format='channels_last', name='us3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_conv2d_legacy_interface(): old_layer = keras.layers.Convolution2D(5, 3, 3, name='conv') new_layer = keras.layers.Conv2D(5, (3, 3), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Convolution2D(5, 3, nb_col=3, name='conv') new_layer = keras.layers.Conv2D(5, (3, 3), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Convolution2D(5, nb_row=3, nb_col=3, name='conv') new_layer = keras.layers.Conv2D(5, (3, 3), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Convolution2D(5, 3, 3, init='normal', subsample=(2, 2), border_mode='valid', dim_ordering='th', W_regularizer='l1', b_regularizer='l2', W_constraint='maxnorm', b_constraint='unitnorm', name='conv') new_layer = keras.layers.Conv2D(5, (3, 3), kernel_initializer='normal', strides=(2, 2), padding='valid', kernel_regularizer='l1', bias_regularizer='l2', kernel_constraint='max_norm', bias_constraint='unit_norm', data_format='channels_first', name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_deconv2d_legacy_interface(): old_layer = keras.layers.Deconvolution2D(5, 3, 3, (6, 7, 5), name='deconv') new_layer = keras.layers.Conv2DTranspose(5, (3, 3), name='deconv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Deconvolution2D(5, 3, 3, output_shape=(6, 7, 5), name='deconv') new_layer = keras.layers.Conv2DTranspose(5, (3, 3), name='deconv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Deconvolution2D(5, 3, nb_col=3, output_shape=(6, 7, 5), name='deconv') new_layer = keras.layers.Conv2DTranspose(5, (3, 3), name='deconv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Deconvolution2D(5, nb_row=3, nb_col=3, output_shape=(6, 7, 5), name='deconv') new_layer = keras.layers.Conv2DTranspose(5, (3, 3), name='deconv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Deconvolution2D(5, 3, 3, output_shape=(6, 7, 5), init='normal', subsample=(2, 2), border_mode='valid', dim_ordering='th', W_regularizer='l1', b_regularizer='l2', W_constraint='maxnorm', b_constraint='unitnorm', name='conv') new_layer = keras.layers.Conv2DTranspose( 5, (3, 3), kernel_initializer='normal', strides=(2, 2), padding='valid', kernel_regularizer='l1', bias_regularizer='l2', kernel_constraint='max_norm', bias_constraint='unit_norm', data_format='channels_first', name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_conv1d_legacy_interface(): old_layer = keras.layers.Convolution1D(5, filter_length=3, input_dim=3, input_length=4, name='conv') new_layer = keras.layers.Conv1D(5, 3, name='conv', input_shape=(4, 3)) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Convolution1D(5, 3, init='normal', subsample_length=2, border_mode='valid', W_regularizer='l1', b_regularizer='l2', W_constraint='maxnorm', b_constraint='unitnorm', name='conv') new_layer = keras.layers.Conv1D(5, 3, kernel_initializer='normal', strides=2, padding='valid', kernel_regularizer='l1', bias_regularizer='l2', kernel_constraint='max_norm', bias_constraint='unit_norm', name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_separable_conv2d_legacy_interface(): old_layer = keras.layers.SeparableConv2D(5, 3, 3, name='conv') new_layer = keras.layers.SeparableConv2D(5, (3, 3), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.SeparableConv2D(5, 3, nb_col=3, name='conv') new_layer = keras.layers.SeparableConv2D(5, (3, 3), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.SeparableConv2D(5, nb_row=3, nb_col=3, name='conv') new_layer = keras.layers.SeparableConv2D(5, (3, 3), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.SeparableConv2D(5, 3, 3, init='normal', subsample=(2, 2), border_mode='valid', dim_ordering='th', depthwise_regularizer='l1', b_regularizer='l2', depthwise_constraint='maxnorm', b_constraint='unitnorm', name='conv') new_layer = keras.layers.SeparableConv2D(5, (3, 3), depthwise_initializer='normal', pointwise_initializer='normal', strides=(2, 2), padding='valid', depthwise_regularizer='l1', bias_regularizer='l2', depthwise_constraint='max_norm', bias_constraint='unit_norm', data_format='channels_first', name='conv') old_config = json.dumps(old_layer.get_config()) new_config = json.dumps(new_layer.get_config()) assert old_config == new_config def test_conv3d_legacy_interface(): old_layer = keras.layers.Convolution3D(5, 3, 3, 4, name='conv') new_layer = keras.layers.Conv3D(5, (3, 3, 4), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Convolution3D(5, 3, 3, kernel_dim3=4, name='conv') new_layer = keras.layers.Conv3D(5, (3, 3, 4), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Convolution3D(5, 3, kernel_dim2=3, kernel_dim3=4, name='conv') new_layer = keras.layers.Conv3D(5, (3, 3, 4), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Convolution3D(5, kernel_dim1=3, kernel_dim2=3, kernel_dim3=4, name='conv') new_layer = keras.layers.Conv3D(5, (3, 3, 4), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.Convolution3D(5, 3, 3, 4, init='normal', subsample=(2, 2, 2), border_mode='valid', dim_ordering='th', W_regularizer='l1', b_regularizer='l2', W_constraint='maxnorm', b_constraint='unitnorm', name='conv') new_layer = keras.layers.Conv3D(5, (3, 3, 4), kernel_initializer='normal', strides=(2, 2, 2), padding='valid', kernel_regularizer='l1', bias_regularizer='l2', kernel_constraint='max_norm', bias_constraint='unit_norm', data_format='channels_first', name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_convlstm2d_legacy_interface(): old_layer = keras.layers.ConvLSTM2D(5, 3, 3, name='conv') new_layer = keras.layers.ConvLSTM2D(5, (3, 3), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.ConvLSTM2D(5, 3, nb_col=3, name='conv') new_layer = keras.layers.ConvLSTM2D(5, (3, 3), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.ConvLSTM2D(5, nb_row=3, nb_col=3, name='conv') new_layer = keras.layers.ConvLSTM2D(5, (3, 3), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.ConvLSTM2D(5, 3, 3, init='normal', inner_init='uniform', forget_bias_init='one', inner_activation='relu', subsample=(2, 2), border_mode='valid', dim_ordering='th', W_regularizer='l1', U_regularizer='l2', b_regularizer='l2', dropout_W=0.2, dropout_U=0.1, name='conv') new_layer = keras.layers.ConvLSTM2D(5, (3, 3), kernel_initializer='normal', recurrent_initializer='uniform', unit_forget_bias=True, recurrent_activation='relu', strides=(2, 2), padding='valid', kernel_regularizer='l1', recurrent_regularizer='l2', bias_regularizer='l2', data_format='channels_first', dropout=0.2, recurrent_dropout=0.1, name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_batchnorm_legacy_interface(): old_layer = keras.layers.BatchNormalization(mode=0, name='bn') new_layer = keras.layers.BatchNormalization(name='bn') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.BatchNormalization(mode=0, beta_init='one', gamma_init='uniform', name='bn') new_layer = keras.layers.BatchNormalization(beta_initializer='ones', gamma_initializer='uniform', name='bn') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_atrousconv1d_legacy_interface(): old_layer = keras.layers.AtrousConvolution1D(5, 3, init='normal', subsample_length=2, border_mode='valid', W_regularizer='l1', b_regularizer='l2', W_constraint='maxnorm', b_constraint='unitnorm', atrous_rate=2, name='conv') new_layer = keras.layers.Conv1D(5, 3, kernel_initializer='normal', strides=2, padding='valid', kernel_regularizer='l1', bias_regularizer='l2', kernel_constraint='max_norm', bias_constraint='unit_norm', dilation_rate=2, name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_atrousconv2d_legacy_interface(): old_layer = keras.layers.AtrousConvolution2D( 5, 3, 3, atrous_rate=(2, 2), init='normal', subsample=(2, 2), border_mode='valid', dim_ordering='th', W_regularizer='l1', b_regularizer='l2', W_constraint='maxnorm', b_constraint='unitnorm', name='conv') new_layer = keras.layers.Conv2D(5, (3, 3), kernel_initializer='normal', strides=(2, 2), padding='valid', kernel_regularizer='l1', bias_regularizer='l2', kernel_constraint='max_norm', bias_constraint='unit_norm', data_format='channels_first', dilation_rate=(2, 2), name='conv') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_zeropadding2d_legacy_interface(): old_layer = keras.layers.ZeroPadding2D(padding={'right_pad': 4, 'bottom_pad': 2, 'top_pad': 1, 'left_pad': 3}, dim_ordering='tf', name='zp2d') new_layer = keras.layers.ZeroPadding2D(((1, 2), (3, 4)), data_format='channels_last', name='zp2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_zeropadding3d_legacy_interface(): old_layer = keras.layers.ZeroPadding3D((2, 2, 2), dim_ordering='tf', name='zp3d') new_layer = keras.layers.ZeroPadding3D((2, 2, 2), data_format='channels_last', name='zp3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_cropping2d_legacy_interface(): old_layer = keras.layers.Cropping2D(dim_ordering='tf', name='c2d') new_layer = keras.layers.Cropping2D(data_format='channels_last', name='c2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_cropping3d_legacy_interface(): old_layer = keras.layers.Cropping3D(dim_ordering='tf', name='c3d') new_layer = keras.layers.Cropping3D(data_format='channels_last', name='c3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) def test_generator_methods_interface(): def train_generator(): x = np.random.randn(2, 2) y = np.random.randint(0, 2, size=[2, 1]) while True: yield (x, y) def val_generator(): x = np.random.randn(2, 2) y = np.random.randint(0, 2, size=[2, 1]) while True: yield (x, y) def pred_generator(): x = np.random.randn(1, 2) while True: yield x x = keras.layers.Input(shape=(2, )) y = keras.layers.Dense(2)(x) model = keras.models.Model(inputs=x, outputs=y) model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit_generator(generator=train_generator(), samples_per_epoch=1, validation_data=val_generator(), nb_val_samples=1, nb_worker=1, pickle_safe=True, max_q_size=3) model.evaluate_generator(generator=train_generator(), val_samples=2, nb_worker=1, pickle_safe=False, max_q_size=3) model.predict_generator(generator=pred_generator(), val_samples=2, nb_worker=1, pickle_safe=False, max_q_size=3) def test_spatialdropout1d_legacy_interface(): old_layer = keras.layers.SpatialDropout1D(p=0.6, name='sd1d') new_layer_1 = keras.layers.SpatialDropout1D(rate=0.6, name='sd1d') new_layer_2 = keras.layers.SpatialDropout1D(0.6, name='sd1d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer_1.get_config()) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer_2.get_config()) def test_spatialdropout2d_legacy_interface(): old_layer = keras.layers.SpatialDropout2D(p=0.5, dim_ordering='tf', name='sd2d') new_layer_1 = keras.layers.SpatialDropout2D(rate=0.5, data_format='channels_last', name='sd2d') new_layer_2 = keras.layers.SpatialDropout2D(0.5, data_format='channels_last', name='sd2d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer_1.get_config()) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer_2.get_config()) def test_spatialdropout3d_legacy_interface(): old_layer = keras.layers.SpatialDropout3D(p=0.5, dim_ordering='tf', name='sd3d') new_layer_1 = keras.layers.SpatialDropout3D(rate=0.5, data_format='channels_last', name='sd3d') new_layer_2 = keras.layers.SpatialDropout3D(0.5, data_format='channels_last', name='sd3d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer_1.get_config()) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer_2.get_config()) def test_optimizer_get_updates_legacy_interface(): for optimizer_cls in [keras.optimizers.RMSprop, keras.optimizers.SGD, keras.optimizers.Adadelta, keras.optimizers.Adam, keras.optimizers.Adagrad, keras.optimizers.Nadam, keras.optimizers.Adamax]: optimizer = optimizer_cls() param = keras.backend.variable(0.) loss = keras.backend.mean(param) constraints = {param: lambda x: x} params = [param] optimizer.get_updates(params, constraints, loss) optimizer.get_updates(params, constraints, loss=loss) optimizer.get_updates(loss, params) optimizer.get_updates(loss, params=params) optimizer.get_updates(loss=loss, params=params) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/utils/0000755000000000116100000000000013355226624015740 5ustar rooteng00000000000000Keras-2.2.4/tests/keras/utils/io_utils_test.py0000644000000000116100000001465313354530144021203 0ustar rooteng00000000000000'''Tests for functions in io_utils.py. ''' import os import pytest from keras.models import Sequential from keras.layers import Dense from keras.utils.io_utils import HDF5Matrix from keras.utils.io_utils import h5dict from keras.utils.io_utils import ask_to_proceed_with_overwrite from numpy.testing import assert_allclose import numpy as np import six import h5py import tempfile try: from unittest.mock import patch except: from mock import patch @pytest.fixture def in_tmpdir(tmpdir): """Runs a function in a temporary directory. Checks that the directory is empty afterwards. """ with tmpdir.as_cwd(): yield None assert not tmpdir.listdir() def create_dataset(h5_path='test.h5'): X = np.random.randn(200, 10).astype('float32') y = np.random.randint(0, 2, size=(200, 1)) with h5py.File(h5_path, 'w') as f: # Creating dataset to store features X_dset = f.create_dataset('my_data', (200, 10), dtype='f') X_dset[:] = X # Creating dataset to store labels y_dset = f.create_dataset('my_labels', (200, 1), dtype='i') y_dset[:] = y def test_io_utils(in_tmpdir): '''Tests the HDF5Matrix code using the sample from @jfsantos at https://gist.github.com/jfsantos/e2ef822c744357a4ed16ec0c885100a3 ''' _, h5_path = tempfile.mkstemp('.h5') create_dataset(h5_path) # Instantiating HDF5Matrix for the training set, # which is a slice of the first 150 elements X_train = HDF5Matrix(h5_path, 'my_data', start=0, end=150) y_train = HDF5Matrix(h5_path, 'my_labels', start=0, end=150) # Likewise for the test set X_test = HDF5Matrix(h5_path, 'my_data', start=150, end=200) y_test = HDF5Matrix(h5_path, 'my_labels', start=150, end=200) # HDF5Matrix behave more or less like Numpy matrices with regards to indexing assert y_train.shape == (150, 1), 'HDF5Matrix shape should match input array' # But they do not support negative indices, so don't try print(X_train[-1]) assert y_train.dtype == np.dtype('i'), ( 'HDF5Matrix dtype should match input array') assert y_train.ndim == 2, 'HDF5Matrix ndim should match input array' assert y_train.size == 150, 'HDF5Matrix ndim should match input array' model = Sequential() model.add(Dense(64, input_shape=(10,), activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='sgd') # Note: you have to use shuffle='batch' or False with HDF5Matrix model.fit(X_train, y_train, batch_size=32, shuffle='batch', verbose=False) # test that evalutation and prediction don't crash and # return reasonable results out_pred = model.predict(X_test, batch_size=32, verbose=False) out_eval = model.evaluate(X_test, y_test, batch_size=32, verbose=False) assert out_pred.shape == (50, 1), 'Prediction shape does not match' assert out_eval.shape == (), 'Shape of evaluation does not match' assert out_eval > 0, ( 'Evaluation value does not meet criteria: {}'.format(out_eval)) # test slicing for shortened array assert len(X_train[0:]) == len(X_train), 'Incorrect shape for sliced data' # test __getitem__ with pytest.raises(IndexError): X_train[1000] with pytest.raises(IndexError): X_train[1000:1001] with pytest.raises(IndexError): X_train[[1000, 1001]] with pytest.raises(IndexError): X_train[six.moves.range(1000, 1001)] with pytest.raises(IndexError): X_train[np.array([1000])] with pytest.raises(TypeError): X_train[None] assert (X_train[0] == X_train[:1][0]).all() assert (X_train[[0, 1]] == X_train[:2]).all() assert (X_train[np.array([0, 1])] == X_train[:2]).all() # test normalizer normalizer = lambda x: x + 1 normalized_X_train = HDF5Matrix(h5_path, 'my_data', start=0, end=150, normalizer=normalizer) assert np.isclose(normalized_X_train[0][0], X_train[0][0] + 1) # test resizing normalizer normalizer_rs = lambda x: x[:, ::2] normalized_rs_X_train = HDF5Matrix(h5_path, 'my_data', start=0, end=150, normalizer=normalizer_rs) assert (normalized_rs_X_train.shape[1] == 5) # test dtype changing normalizer normalizer_dtype = lambda x: x.astype(np.uint8) normalized_dtype_X_train = HDF5Matrix(h5_path, 'my_data', start=0, end=150, normalizer=normalizer_dtype) assert (normalized_dtype_X_train.dtype == np.uint8) os.remove(h5_path) def test_ask_to_proceed_with_overwrite(): with patch('six.moves.input') as mock: mock.return_value = 'y' assert ask_to_proceed_with_overwrite('/tmp/not_exists') mock.return_value = 'n' assert not ask_to_proceed_with_overwrite('/tmp/not_exists') def test_h5dict_attrs(): _, h5_path = tempfile.mkstemp('.h5') # test both HDF5 and dict implementations paths = [h5_path, dict()] for path in paths: f = h5dict(path, mode='w') # str f['x'] = 'abcd' # list f['y'] = [b'efg', b'hij', b'klmn'] # ndarray array = np.random.random((4, 5, 512)) f['z'] = array f.close() f = h5dict(path, mode='r') assert f['x'] == 'abcd' assert f['y'] == [b'efg', b'hij', b'klmn'] assert_allclose(f['z'], array) f.close() os.remove(h5_path) def test_h5dict_groups(): _, h5_path = tempfile.mkstemp('.h5') # test both HDF5 and dict implementations paths = [h5_path, dict()] for path in paths: f = h5dict(path, mode='w') group1 = f['group1'] group2 = group1['group2'] group2['x'] = 'abcd' group3 = group2['group3'] group3['y'] = [b'efg', b'hij', b'klmn'] group4 = group3['group4'] array = np.random.random((4, 5, 512)) group4['z'] = array f.close() f = h5dict(path, mode='r') assert 'group1' in f group1 = f['group1'] assert 'group2' in group1 group2 = group1['group2'] assert group2['x'] == 'abcd' assert 'group3' in group2 group3 = group2['group3'] assert group3['y'] == [b'efg', b'hij', b'klmn'] assert 'group4' in group3 group4 = group3['group4'] assert_allclose(group4['z'], array) f.close() os.remove(h5_path) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/utils/generic_utils_test.py0000644000000000116100000001071313354530144022201 0ustar rooteng00000000000000import sys import pytest import numpy as np import marshal from keras.utils.generic_utils import custom_object_scope from keras.utils.generic_utils import has_arg from keras.utils.generic_utils import Progbar from keras.utils.generic_utils import func_dump from keras.utils.generic_utils import func_load from keras import activations from keras import regularizers def test_progbar(): values_s = [None, [['key1', 1], ['key2', 1e-4]], [['key3', 1], ['key2', 1e-4]]] for target in (len(values_s) - 1, None): for verbose in (0, 1, 2): bar = Progbar(target, width=30, verbose=verbose, interval=0.05) for current, values in enumerate(values_s): bar.update(current, values=values) def test_custom_objects_scope(): def custom_fn(): pass class CustomClass(object): pass with custom_object_scope({'CustomClass': CustomClass, 'custom_fn': custom_fn}): act = activations.get('custom_fn') assert act == custom_fn cl = regularizers.get('CustomClass') assert cl.__class__ == CustomClass @pytest.mark.parametrize('fn, name, accept_all, expected', [ ('f(x)', 'x', False, True), ('f(x)', 'y', False, False), ('f(x)', 'y', True, False), ('f(x, y)', 'y', False, True), ('f(x, y=1)', 'y', False, True), ('f(x, **kwargs)', 'x', False, True), ('f(x, **kwargs)', 'y', False, False), ('f(x, **kwargs)', 'y', True, True), ('f(x, y=1, **kwargs)', 'y', False, True), # Keyword-only arguments (Python 3 only) ('f(x, *args, y=1)', 'y', False, True), ('f(x, *args, y=1)', 'z', True, False), ('f(x, *, y=1)', 'x', False, True), ('f(x, *, y=1)', 'y', False, True), # lambda (lambda x: x, 'x', False, True), (lambda x: x, 'y', False, False), (lambda x: x, 'y', True, False), ]) def test_has_arg(fn, name, accept_all, expected): if isinstance(fn, str): context = dict() try: exec('def {}: pass'.format(fn), context) except SyntaxError: if sys.version_info >= (3,): raise pytest.skip('Function is not compatible with Python 2') # Sometimes exec adds builtins to the context context.pop('__builtins__', None) fn, = context.values() assert has_arg(fn, name, accept_all) is expected @pytest.mark.xfail(sys.version_info < (3, 3), reason='inspect API does not reveal positional-only arguments') def test_has_arg_positional_only(): assert has_arg(pow, 'x') is False @pytest.mark.parametrize( 'test_function_type', ('simple function', 'closured function')) def test_func_dump_and_load(test_function_type): if test_function_type == 'simple function': def test_func(): return r'\u' elif test_function_type == 'closured function': def get_test_func(): x = r'\u' def test_func(): return x return test_func test_func = get_test_func() else: raise Exception('Unknown test case for test_func_dump_and_load') serialized = func_dump(test_func) deserialized = func_load(serialized) assert deserialized.__code__ == test_func.__code__ assert deserialized.__defaults__ == test_func.__defaults__ assert deserialized.__closure__ == test_func.__closure__ def test_func_dump_and_load_closure(): y = 0 test_func = lambda x: x + y serialized, _, closure = func_dump(test_func) deserialized = func_load(serialized, closure=closure) assert deserialized.__code__ == test_func.__code__ assert deserialized.__defaults__ == test_func.__defaults__ assert deserialized.__closure__ == test_func.__closure__ @pytest.mark.parametrize( 'test_func', [activations.softmax, np.argmax, lambda x: x**2, lambda x: x]) def test_func_dump_and_load_backwards_compat(test_func): # this test ensures that models serialized prior to version 2.1.2 can still be # deserialized # see: # https://github.com/evhub/keras/blob/2.1.1/keras/utils/generic_utils.py#L166 serialized = marshal.dumps(test_func.__code__).decode('raw_unicode_escape') deserialized = func_load(serialized, defaults=test_func.__defaults__) assert deserialized.__code__ == test_func.__code__ assert deserialized.__defaults__ == test_func.__defaults__ assert deserialized.__closure__ == test_func.__closure__ if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/utils/np_utils_test.py0000644000000000116100000000236213210317577021207 0ustar rooteng00000000000000"""Tests for functions in np_utils.py. """ import numpy as np import pytest from keras.utils import to_categorical def test_to_categorical(): num_classes = 5 shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)] expected_shapes = [(1, num_classes), (3, num_classes), (4, 3, num_classes), (5, 4, 3, num_classes), (3, num_classes), (3, 2, num_classes)] labels = [np.random.randint(0, num_classes, shape) for shape in shapes] one_hots = [to_categorical(label, num_classes) for label in labels] for label, one_hot, expected_shape in zip(labels, one_hots, expected_shapes): # Check shape assert one_hot.shape == expected_shape # Make sure there are only 0s and 1s assert np.array_equal(one_hot, one_hot.astype(bool)) # Make sure there is exactly one 1 in a row assert np.all(one_hot.sum(axis=-1) == 1) # Get original labels back from one hots assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/utils/data_utils_test.py0000644000000000116100000002660713342055016021504 0ustar rooteng00000000000000"""Tests for functions in data_utils.py. """ import os import sys import tarfile import threading import zipfile from itertools import cycle import multiprocessing as mp import numpy as np import pytest import six from six.moves.urllib.parse import urljoin from six.moves.urllib.request import pathname2url from keras.utils import GeneratorEnqueuer from keras.utils import OrderedEnqueuer from keras.utils import Sequence from keras.utils.data_utils import _hash_file from keras.utils.data_utils import get_file from keras.utils.data_utils import validate_file from keras import backend as K pytestmark = pytest.mark.skipif( K.backend() == 'tensorflow', reason='Temporarily disabled until the use_multiprocessing problem is solved') if sys.version_info < (3,): def next(x): return x.next() def use_spawn(func): """Decorator to test both Unix (fork) and Windows (spawn)""" @six.wraps(func) def wrapper(*args, **kwargs): out = func(*args, **kwargs) if sys.version_info > (3, 4): mp.set_start_method('spawn', force=True) func(*args, **kwargs) mp.set_start_method('fork', force=True) return out return wrapper @pytest.fixture def in_tmpdir(tmpdir): """Runs a function in a temporary directory. Checks that the directory is empty afterwards. """ with tmpdir.as_cwd(): yield None assert not tmpdir.listdir() def test_data_utils(in_tmpdir): """Tests get_file from a url, plus extraction and validation. """ dirname = 'data_utils' with open('test.txt', 'w') as text_file: text_file.write('Float like a butterfly, sting like a bee.') with tarfile.open('test.tar.gz', 'w:gz') as tar_file: tar_file.add('test.txt') with zipfile.ZipFile('test.zip', 'w') as zip_file: zip_file.write('test.txt') origin = urljoin('file://', pathname2url(os.path.abspath('test.tar.gz'))) path = get_file(dirname, origin, untar=True) filepath = path + '.tar.gz' hashval_sha256 = _hash_file(filepath) hashval_md5 = _hash_file(filepath, algorithm='md5') path = get_file(dirname, origin, md5_hash=hashval_md5, untar=True) path = get_file(filepath, origin, file_hash=hashval_sha256, extract=True) assert os.path.exists(filepath) assert validate_file(filepath, hashval_sha256) assert validate_file(filepath, hashval_md5) os.remove(filepath) os.remove('test.tar.gz') origin = urljoin('file://', pathname2url(os.path.abspath('test.zip'))) hashval_sha256 = _hash_file('test.zip') hashval_md5 = _hash_file('test.zip', algorithm='md5') path = get_file(dirname, origin, md5_hash=hashval_md5, extract=True) path = get_file(dirname, origin, file_hash=hashval_sha256, extract=True) assert os.path.exists(path) assert validate_file(path, hashval_sha256) assert validate_file(path, hashval_md5) os.remove(path) os.remove('test.txt') os.remove('test.zip') """Enqueuers Tests""" class threadsafe_iter: """Takes an iterator/generator and makes it thread-safe by serializing call to the `next` method of given iterator/generator. """ def __init__(self, it): self.it = it self.lock = threading.Lock() def __iter__(self): return self def __next__(self): return self.next() def next(self): with self.lock: return next(self.it) def threadsafe_generator(f): """A decorator that takes a generator function and makes it thread-safe. """ def g(*a, **kw): return threadsafe_iter(f(*a, **kw)) return g class DummySequence(Sequence): def __init__(self, shape, value=1.0): self.shape = shape self.inner = value def __getitem__(self, item): return np.ones(self.shape, dtype=np.uint32) * item * self.inner def __len__(self): return 100 def on_epoch_end(self): self.inner *= 5.0 class FaultSequence(Sequence): def __getitem__(self, item): raise IndexError(item, 'is not present') def __len__(self): return 100 def on_epoch_end(self): pass @threadsafe_generator def create_generator_from_sequence_threads(ds): for i in cycle(range(len(ds))): yield ds[i] def create_generator_from_sequence_pcs(ds): for i in cycle(range(len(ds))): yield ds[i] def test_generator_enqueuer_threads(): enqueuer = GeneratorEnqueuer(create_generator_from_sequence_threads( DummySequence([3, 200, 200, 3])), use_multiprocessing=False) enqueuer.start(3, 10) gen_output = enqueuer.get() acc = [] for i in range(100): acc.append(int(next(gen_output)[0, 0, 0, 0])) """ Not comparing the order since it is not guaranteed. It may get ordered, but not a lot, one thread can take the GIL before he was supposed to. """ assert len(set(acc) - set(range(100))) == 0, "Output is not the same" enqueuer.stop() def test_generator_enqueuer_processes(): enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs( DummySequence([3, 200, 200, 3])), use_multiprocessing=True) enqueuer.start(3, 10) gen_output = enqueuer.get() acc = [] for i in range(100): acc.append(int(next(gen_output)[0, 0, 0, 0])) assert acc != list(range(100)), ('Order was keep in GeneratorEnqueuer ' 'with processes') enqueuer.stop() def test_generator_enqueuer_threadsafe(): enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs( DummySequence([3, 200, 200, 3])), use_multiprocessing=False) enqueuer.start(3, 10) gen_output = enqueuer.get() with pytest.raises(RuntimeError) as e: [next(gen_output) for _ in range(10)] assert 'thread-safe' in str(e.value) enqueuer.stop() def test_generator_enqueuer_fail_threads(): enqueuer = GeneratorEnqueuer(create_generator_from_sequence_threads( FaultSequence()), use_multiprocessing=False) enqueuer.start(3, 10) gen_output = enqueuer.get() with pytest.raises(IndexError): next(gen_output) def test_generator_enqueuer_fail_processes(): enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs( FaultSequence()), use_multiprocessing=True) enqueuer.start(3, 10) gen_output = enqueuer.get() with pytest.raises(IndexError): next(gen_output) def test_ordered_enqueuer_threads(): enqueuer = OrderedEnqueuer(DummySequence([3, 200, 200, 3]), use_multiprocessing=False) enqueuer.start(3, 10) gen_output = enqueuer.get() acc = [] for i in range(100): acc.append(next(gen_output)[0, 0, 0, 0]) assert acc == list(range(100)), ('Order was not keep in GeneratorEnqueuer ' 'with threads') enqueuer.stop() def test_ordered_enqueuer_threads_not_ordered(): enqueuer = OrderedEnqueuer(DummySequence([3, 200, 200, 3]), use_multiprocessing=False, shuffle=True) enqueuer.start(3, 10) gen_output = enqueuer.get() acc = [] for i in range(100): acc.append(next(gen_output)[0, 0, 0, 0]) assert acc != list(range(100)), ('Order was not keep in GeneratorEnqueuer ' 'with threads') enqueuer.stop() @use_spawn def test_ordered_enqueuer_processes(): enqueuer = OrderedEnqueuer(DummySequence([3, 200, 200, 3]), use_multiprocessing=True) enqueuer.start(3, 10) gen_output = enqueuer.get() acc = [] for i in range(100): acc.append(next(gen_output)[0, 0, 0, 0]) assert acc == list(range(100)), ('Order was not keep in GeneratorEnqueuer ' 'with processes') enqueuer.stop() def test_ordered_enqueuer_fail_threads(): enqueuer = OrderedEnqueuer(FaultSequence(), use_multiprocessing=False) enqueuer.start(3, 10) gen_output = enqueuer.get() with pytest.raises(IndexError): next(gen_output) @use_spawn def test_on_epoch_end_processes(): enqueuer = OrderedEnqueuer(DummySequence([3, 200, 200, 3]), use_multiprocessing=True) enqueuer.start(3, 10) gen_output = enqueuer.get() acc = [] for i in range(200): acc.append(next(gen_output)[0, 0, 0, 0]) assert acc[100:] == list([k * 5 for k in range(100)]), ( 'Order was not keep in GeneratorEnqueuer with processes') enqueuer.stop() @use_spawn def test_context_switch(): enqueuer = OrderedEnqueuer(DummySequence([3, 200, 200, 3]), use_multiprocessing=True) enqueuer2 = OrderedEnqueuer(DummySequence([3, 200, 200, 3], value=15), use_multiprocessing=True) enqueuer.start(3, 10) enqueuer2.start(3, 10) gen_output = enqueuer.get() gen_output2 = enqueuer2.get() acc = [] for i in range(100): acc.append(next(gen_output)[0, 0, 0, 0]) assert acc[-1] == 99 # One epoch is completed so enqueuer will switch the Sequence acc = [] for i in range(100): acc.append(next(gen_output2)[0, 0, 0, 0]) assert acc[-1] == 99 * 15 # One epoch has been completed so enqueuer2 will switch # Be sure that both Sequence were updated assert next(gen_output)[0, 0, 0, 0] == 0 assert next(gen_output)[0, 0, 0, 0] == 5 assert next(gen_output2)[0, 0, 0, 0] == 0 assert next(gen_output2)[0, 0, 0, 0] == 15 * 5 # Tear down everything enqueuer.stop() enqueuer2.stop() def test_on_epoch_end_threads(): enqueuer = OrderedEnqueuer(DummySequence([3, 200, 200, 3]), use_multiprocessing=False) enqueuer.start(3, 10) gen_output = enqueuer.get() acc = [] for i in range(100): acc.append(next(gen_output)[0, 0, 0, 0]) acc = [] for i in range(100): acc.append(next(gen_output)[0, 0, 0, 0]) assert acc == list([k * 5 for k in range(100)]), ( 'Order was not keep in GeneratorEnqueuer with processes') enqueuer.stop() @use_spawn def test_ordered_enqueuer_fail_processes(): enqueuer = OrderedEnqueuer(FaultSequence(), use_multiprocessing=True) enqueuer.start(3, 10) gen_output = enqueuer.get() with pytest.raises(IndexError): next(gen_output) @threadsafe_generator def create_finite_generator_from_sequence_threads(ds): for i in range(len(ds)): yield ds[i] def create_finite_generator_from_sequence_pcs(ds): for i in range(len(ds)): yield ds[i] def test_finite_generator_enqueuer_threads(): enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_threads( DummySequence([3, 200, 200, 3])), use_multiprocessing=False) enqueuer.start(3, 10) gen_output = enqueuer.get() acc = [] for output in gen_output: acc.append(int(output[0, 0, 0, 0])) assert set(acc) == set(range(100)), "Output is not the same" enqueuer.stop() def test_finite_generator_enqueuer_processes(): enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_pcs( DummySequence([3, 200, 200, 3])), use_multiprocessing=True) enqueuer.start(3, 10) gen_output = enqueuer.get() acc = [] for output in gen_output: acc.append(int(output[0, 0, 0, 0])) assert acc != list(range(100)), ('Order was keep in GeneratorEnqueuer ' 'with processes') enqueuer.stop() if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/utils/layer_utils_test.py0000644000000000116100000000435013354530144021701 0ustar rooteng00000000000000import pytest import numpy as np from numpy.testing import assert_allclose from keras import backend as K from keras.layers import Conv2D from keras.layers import Dense from keras.layers import Flatten from keras.models import Sequential from keras.utils import layer_utils def test_convert_weights(): def get_model(shape, data_format): model = Sequential() model.add(Conv2D(filters=2, kernel_size=(4, 3), input_shape=shape, data_format=data_format)) model.add(Flatten()) model.add(Dense(5)) return model for data_format in ['channels_first', 'channels_last']: if data_format == 'channels_first': shape = (3, 5, 5) target_shape = (5, 5, 3) prev_shape = (2, 3, 2) flip = lambda x: np.flip(np.flip(x, axis=2), axis=3) transpose = lambda x: np.transpose(x, (0, 2, 3, 1)) target_data_format = 'channels_last' elif data_format == 'channels_last': shape = (5, 5, 3) target_shape = (3, 5, 5) prev_shape = (2, 2, 3) flip = lambda x: np.flip(np.flip(x, axis=1), axis=2) transpose = lambda x: np.transpose(x, (0, 3, 1, 2)) target_data_format = 'channels_first' model1 = get_model(shape, data_format) model2 = get_model(target_shape, target_data_format) conv = K.function([model1.input], [model1.layers[0].output]) x = np.random.random((1,) + shape) # Test equivalence of convert_all_kernels_in_model convout1 = conv([x])[0] layer_utils.convert_all_kernels_in_model(model1) convout2 = flip(conv([flip(x)])[0]) assert_allclose(convout1, convout2, atol=1e-5) # Test equivalence of convert_dense_weights_data_format out1 = model1.predict(x) layer_utils.convert_dense_weights_data_format( model1.layers[2], prev_shape, target_data_format) for (src, dst) in zip(model1.layers, model2.layers): dst.set_weights(src.get_weights()) out2 = model2.predict(transpose(x)) assert_allclose(out1, out2, atol=1e-5) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/utils/multi_gpu_test.py0000644000000000116100000002376513354530144021365 0ustar rooteng00000000000000"""These tests are not meant to be run on CI. """ from __future__ import print_function import keras from keras import backend as K from keras.utils import multi_gpu_model import numpy as np import pytest import time import tempfile import tensorflow as tf from keras.preprocessing.image import ImageDataGenerator pytestmark = pytest.mark.skipif(K.backend() != 'tensorflow', reason='Requires TF.') if K.backend() == 'tensorflow': available_devices = keras.utils.multi_gpu_utils._get_available_devices() available_devices = [keras.utils.multi_gpu_utils._normalize_device_name(name) for name in available_devices] pytestmark = pytest.mark.skipif('/gpu:7' not in available_devices, reason='Requires 8 GPUs.') def test_multi_gpu_simple_model(): print('####### test simple model') num_samples = 1000 input_dim = 10 output_dim = 1 hidden_dim = 10 gpus = 8 target_gpu_id = [0, 2, 4] epochs = 2 model = keras.models.Sequential() model.add(keras.layers.Dense(hidden_dim, input_shape=(input_dim,))) model.add(keras.layers.Dense(output_dim)) x = np.random.random((num_samples, input_dim)) y = np.random.random((num_samples, output_dim)) parallel_model = multi_gpu_model(model, gpus=gpus) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=epochs) parallel_model = multi_gpu_model(model, gpus=target_gpu_id) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=epochs) def test_multi_gpu_multi_io_model(): print('####### test multi-io model') num_samples = 1000 input_dim_a = 10 input_dim_b = 5 output_dim_a = 1 output_dim_b = 2 hidden_dim = 10 gpus = 8 target_gpu_id = [0, 2, 4] epochs = 2 input_a = keras.Input((input_dim_a,)) input_b = keras.Input((input_dim_b,)) a = keras.layers.Dense(hidden_dim)(input_a) b = keras.layers.Dense(hidden_dim)(input_b) c = keras.layers.concatenate([a, b]) output_a = keras.layers.Dense(output_dim_a)(c) output_b = keras.layers.Dense(output_dim_b)(c) model = keras.models.Model([input_a, input_b], [output_a, output_b]) a_x = np.random.random((num_samples, input_dim_a)) b_x = np.random.random((num_samples, input_dim_b)) a_y = np.random.random((num_samples, output_dim_a)) b_y = np.random.random((num_samples, output_dim_b)) parallel_model = multi_gpu_model(model, gpus=gpus) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs) parallel_model = multi_gpu_model(model, gpus=target_gpu_id) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs) def test_multi_gpu_invalid_devices(): input_shape = (1000, 10) model = keras.models.Sequential() model.add(keras.layers.Dense(10, activation='relu', input_shape=input_shape[1:])) model.add(keras.layers.Dense(1, activation='sigmoid')) x = np.random.random(input_shape) y = np.random.random((input_shape[0], 1)) with pytest.raises(ValueError): parallel_model = multi_gpu_model(model, gpus=10) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=2) with pytest.raises(ValueError): parallel_model = multi_gpu_model(model, gpus=[0, 2, 4, 6, 8]) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=2) with pytest.raises(ValueError): parallel_model = multi_gpu_model(model, gpus=1) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=2) with pytest.raises(ValueError): parallel_model = multi_gpu_model(model, gpus=[0]) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=2) def test_serialization(): model = keras.models.Sequential() model.add(keras.layers.Dense(3, input_shape=(4,))) model.add(keras.layers.Dense(4)) x = np.random.random((100, 4)) y = np.random.random((100, 4)) parallel_model = multi_gpu_model(model, gpus=2) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=1) ref_output = parallel_model.predict(x) _, fname = tempfile.mkstemp('.h5') parallel_model.save(fname) K.clear_session() parallel_model = keras.models.load_model(fname) output = parallel_model.predict(x) np.testing.assert_allclose(ref_output, output, atol=1e-5) def multi_gpu_application_np_array_benchmark(): print('####### Xception benchmark - np i/o') model_cls = keras.applications.Xception num_samples = 1000 height = 224 width = 224 num_classes = 1000 epochs = 4 batch_size = 40 x = np.random.random((num_samples, height, width, 3)) y = np.random.random((num_samples, num_classes)) # Baseline model = model_cls(weights=None, input_shape=(height, width, 3), classes=num_classes) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') # Training start_time = time.time() model.fit(x, y, epochs=epochs) total_time = time.time() - start_time print('baseline training:', total_time) # Inference start_time = time.time() model.predict(x) total_time = time.time() - start_time print('baseline inference:', total_time) for i in range(2, 9, 2): K.clear_session() with tf.device('/cpu:0'): model = model_cls(weights=None, input_shape=(height, width, 3), classes=num_classes) parallel_model = multi_gpu_model(model, gpus=i) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') start_time = time.time() parallel_model.fit(x, y, epochs=epochs, batch_size=batch_size) total_time = time.time() - start_time print('%d gpus training:' % i, total_time) # Inference start_time = time.time() parallel_model.predict(x, batch_size=batch_size) total_time = time.time() - start_time print('%d gpus inference:' % i, total_time) def multi_gpu_application_folder_generator_benchmark(): """Before running this test: wget https://s3.amazonaws.com/img-datasets/cats_and_dogs_small.zip unzip cats_and_dogs_small.zip """ print('####### Xception benchmark - folder generator i/o') model_cls = keras.applications.Xception height = 150 width = 150 num_classes = 2 epochs = 3 steps_per_epoch = 100 batch_size = 64 # Baseline model = model_cls(weights=None, input_shape=(height, width, 3), classes=num_classes) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') datagen = ImageDataGenerator( rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') train_dir = '/home/ubuntu/cats_and_dogs_small/train' # Change this train_gen = datagen.flow_from_directory( train_dir, target_size=(height, width), batch_size=batch_size, class_mode='categorical') # Training start_time = time.time() model.fit_generator(train_gen, steps_per_epoch=steps_per_epoch, epochs=epochs, workers=4) total_time = time.time() - start_time print('baseline training:', total_time) for i in range(2, 9): K.clear_session() with tf.device('/cpu:0'): model = model_cls(weights=None, input_shape=(height, width, 3), classes=num_classes) parallel_model = multi_gpu_model(model, gpus=i) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') train_gen = datagen.flow_from_directory( train_dir, target_size=(height, width), batch_size=batch_size, class_mode='categorical') start_time = time.time() parallel_model.fit_generator( train_gen, steps_per_epoch=steps_per_epoch, epochs=epochs, workers=4 * i) total_time = time.time() - start_time print('%d gpus training:' % i, total_time) def test_multi_gpu_with_multi_input_layers(): inputs = keras.Input((4, 3)) init_state = keras.Input((3,)) outputs = keras.layers.SimpleRNN( 3, return_sequences=True)(inputs, initial_state=init_state) x = [np.random.randn(2, 4, 3), np.random.randn(2, 3)] y = np.random.randn(2, 4, 3) model = keras.models.Model([inputs, init_state], outputs) parallel_model = multi_gpu_model(model, 2) parallel_model.compile(loss='mean_squared_error', optimizer='adam') parallel_model.train_on_batch(x, y) def test_multi_gpu_with_siamese(): input_shape = (3,) nested_model = keras.models.Sequential([ keras.layers.Dense(32, input_shape=input_shape), keras.layers.Dense(1) ], name='nested') input1 = keras.Input(input_shape) input2 = keras.Input(input_shape) score1 = nested_model(input1) score2 = nested_model(input2) score_sum = keras.layers.Add(name='add')([score1, score2]) siamese = keras.models.Model(inputs=[input1, input2], outputs=[score_sum, score1, score2], name='siamese') parallel_siamese = multi_gpu_model(siamese, 2) assert parallel_siamese.output_names == ['add', 'nested_1', 'nested_2'] if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/utils/vis_utils_test.py0000644000000000116100000000163213342055016021363 0ustar rooteng00000000000000import pytest import os import sys import numpy as np from keras.layers import Conv2D from keras.layers import Dense from keras.layers import Flatten from keras.layers import LSTM from keras.layers import TimeDistributed from keras.models import Sequential from keras.utils import vis_utils def test_plot_model(): model = Sequential() model.add(Conv2D(2, kernel_size=(2, 3), input_shape=(3, 5, 5), name='conv')) model.add(Flatten(name='flat')) model.add(Dense(5, name='dense1')) vis_utils.plot_model(model, to_file='model1.png', show_layer_names=False) os.remove('model1.png') model = Sequential() model.add(LSTM(16, return_sequences=True, input_shape=(2, 3), name='lstm')) model.add(TimeDistributed(Dense(5, name='dense2'))) vis_utils.plot_model(model, to_file='model2.png', show_shapes=True) os.remove('model2.png') if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/utils/conv_utils_test.py0000644000000000116100000000650413326715636021547 0ustar rooteng00000000000000import pytest import numpy as np from keras.utils import conv_utils from keras import backend as K def test_normalize_tuple(): assert conv_utils.normalize_tuple(5, 2, 'kernel_size') == (5, 5) assert conv_utils.normalize_tuple([7, 9], 2, 'kernel_size') == (7, 9) with pytest.raises(ValueError): conv_utils.normalize_tuple(None, 2, 'kernel_size') with pytest.raises(ValueError): conv_utils.normalize_tuple([2, 3, 4], 2, 'kernel_size') with pytest.raises(ValueError): conv_utils.normalize_tuple(['str', 'impossible'], 2, 'kernel_size') def test_invalid_data_format(): with pytest.raises(ValueError): K.normalize_data_format('channels_middle') def test_invalid_padding(): with pytest.raises(ValueError): conv_utils.normalize_padding('diagonal') def test_invalid_convert_kernel(): with pytest.raises(ValueError): conv_utils.convert_kernel(np.zeros((10, 20))) def test_conv_output_length(): assert conv_utils.conv_output_length(None, 7, 'same', 1) is None assert conv_utils.conv_output_length(224, 7, 'same', 1) == 224 assert conv_utils.conv_output_length(224, 7, 'same', 2) == 112 assert conv_utils.conv_output_length(32, 5, 'valid', 1) == 28 assert conv_utils.conv_output_length(32, 5, 'valid', 2) == 14 assert conv_utils.conv_output_length(32, 5, 'causal', 1) == 32 assert conv_utils.conv_output_length(32, 5, 'causal', 2) == 16 assert conv_utils.conv_output_length(32, 5, 'full', 1) == 36 assert conv_utils.conv_output_length(32, 5, 'full', 2) == 18 with pytest.raises(AssertionError): conv_utils.conv_output_length(32, 5, 'diagonal', 2) def test_conv_input_length(): assert conv_utils.conv_input_length(None, 7, 'same', 1) is None assert conv_utils.conv_input_length(112, 7, 'same', 1) == 112 assert conv_utils.conv_input_length(112, 7, 'same', 2) == 223 assert conv_utils.conv_input_length(28, 5, 'valid', 1) == 32 assert conv_utils.conv_input_length(14, 5, 'valid', 2) == 31 assert conv_utils.conv_input_length(36, 5, 'full', 1) == 32 assert conv_utils.conv_input_length(18, 5, 'full', 2) == 31 with pytest.raises(AssertionError): conv_utils.conv_output_length(18, 5, 'diagonal', 2) def test_deconv_length(): assert conv_utils.deconv_length(None, 1, 7, 'same', None) is None assert conv_utils.deconv_length(224, 1, 7, 'same', None) == 224 assert conv_utils.deconv_length(224, 2, 7, 'same', None) == 448 assert conv_utils.deconv_length(32, 1, 5, 'valid', None) == 36 assert conv_utils.deconv_length(32, 2, 5, 'valid', None) == 67 assert conv_utils.deconv_length(32, 1, 5, 'full', None) == 28 assert conv_utils.deconv_length(32, 2, 5, 'full', None) == 59 assert conv_utils.deconv_length(224, 1, 7, 'same', 0) == 224 assert conv_utils.deconv_length(224, 2, 7, 'same', 0) == 447 assert conv_utils.deconv_length(224, 2, 7, 'same', 1) == 448 assert conv_utils.deconv_length(32, 1, 5, 'valid', 0) == 36 assert conv_utils.deconv_length(32, 2, 5, 'valid', 0) == 67 assert conv_utils.deconv_length(32, 2, 5, 'valid', 1) == 68 assert conv_utils.deconv_length(6, 1, 3, 'full', 0) == 4 assert conv_utils.deconv_length(6, 2, 3, 'full', 1) == 10 assert conv_utils.deconv_length(6, 2, 3, 'full', 2) == 11 if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/backend/0000755000000000116100000000000013355226624016167 5ustar rooteng00000000000000Keras-2.2.4/tests/keras/backend/backend_test.py0000644000000000116100000024642113355226611021174 0ustar rooteng00000000000000import pytest from numpy.testing import assert_allclose import numpy as np import scipy.sparse as sparse import warnings from keras import backend as K from keras.backend import floatx, set_floatx, variable from keras.utils.conv_utils import convert_kernel import reference_operations as KNP BACKENDS = [] # Holds a list of all available back-ends try: from keras.backend import cntk_backend as KC BACKENDS.append(KC) except ImportError: KC = None warnings.warn('Could not import the CNTK backend') try: from keras.backend import tensorflow_backend as KTF BACKENDS.append(KTF) except ImportError: KTF = None warnings.warn('Could not import the TensorFlow backend.') try: from keras.backend import theano_backend as KTH BACKENDS.append(KTH) except ImportError: KTH = None warnings.warn('Could not import the Theano backend') WITH_NP = [KTH if K.backend() == 'theano' else KC if K.backend() == 'cntk' else KTF, KNP] def check_dtype(var, dtype): if K._BACKEND == 'theano': assert var.dtype == dtype else: assert var.dtype.name == '%s_ref' % dtype def cntk_func_tensors(function_name, shapes_or_vals, **kwargs): placeholders = [] variables = [] for shape_or_val in shapes_or_vals: if isinstance(shape_or_val, tuple): shape = shape_or_val placeholders.append(KC.placeholder(shape)) else: value = shape_or_val variables.append(KC.variable(value)) output_cntk = getattr(KC, function_name)(*(placeholders + variables), **kwargs) cntk_func = KC.function(placeholders, [output_cntk]) return output_cntk, cntk_func def parse_shape_or_val(shape_or_val): if isinstance(shape_or_val, np.ndarray): return shape_or_val.shape, shape_or_val else: return shape_or_val, np.random.random(shape_or_val).astype(np.float32) - 0.5 def assert_list_pairwise(z_list, shape=True, allclose=True, itself=False, atol=1e-05): for (z1, z2) in zip(z_list[1:], z_list[:-1]): if shape: assert z1.shape == z2.shape if allclose: assert_allclose(z1, z2, atol=atol) if itself: assert z1 == z2 def assert_list_keras_shape(t_list, z_list): for t, z in zip(t_list, z_list): if hasattr(t, '_keras_shape') and len(t._keras_shape) > 1: for i, s in enumerate(t._keras_shape): if s: assert t._keras_shape[i] == z.shape[i] def check_single_tensor_operation(function_name, x_shape_or_val, backend_list, **kwargs): shape_or_val = kwargs.pop('shape_or_val', True) assert_value_equality = kwargs.pop('assert_value_equality', True) cntk_dynamicity = kwargs.pop('cntk_dynamicity', False) if shape_or_val: x_shape, x_val = parse_shape_or_val(x_shape_or_val) t_list = [] z_list = [] for k in backend_list: if shape_or_val: if (k == KC) & (cntk_dynamicity): t, f = cntk_func_tensors(function_name, [x_shape], **kwargs) z = f([x_val])[0] else: t = getattr(k, function_name)(k.variable(x_val), **kwargs) z = k.eval(t) else: t = getattr(k, function_name)(x_shape_or_val, **kwargs) z = k.eval(t) t_list += [t] z_list += [z] assert_list_pairwise(z_list, allclose=assert_value_equality) assert_list_keras_shape(t_list, z_list) def check_two_tensor_operation(function_name, x_shape_or_val, y_shape_or_val, backend_list, **kwargs): concat_args = kwargs.pop('concat_args', False) cntk_dynamicity = kwargs.pop('cntk_dynamicity', False) cntk_two_dynamicity = kwargs.pop('cntk_two_dynamicity', False) x_shape, x_val = parse_shape_or_val(x_shape_or_val) y_shape, y_val = parse_shape_or_val(y_shape_or_val) t_list = [] z_list = [] for k in backend_list: if (k == KC) & (cntk_dynamicity): t, f = cntk_func_tensors(function_name, [x_shape, y_val], **kwargs) z = f([x_val])[0] elif (k == KC) & (cntk_two_dynamicity): t, f = cntk_func_tensors(function_name, [x_shape, y_shape], **kwargs) z = f([x_val, y_val])[0] elif (k == KTH) & (function_name[:4] == 'conv'): t = getattr(k, function_name)( k.variable(x_val), k.variable(convert_kernel(y_val)), **kwargs) z = k.eval(t) elif concat_args: t = getattr(k, function_name)( [k.variable(x_val), k.variable(y_val)], **kwargs) z = k.eval(t) else: t = getattr(k, function_name)( k.variable(x_val), k.variable(y_val), **kwargs) z = k.eval(t) t_list += [t] z_list += [z] assert_list_pairwise(z_list) assert_list_keras_shape(t_list, z_list) def check_composed_tensor_operations(first_function_name, first_function_args, second_function_name, second_function_args, input_shape, backend_list): val = np.random.random(input_shape) - 0.5 z_list = [] for k in backend_list: x = k.variable(val) y = getattr(k, first_function_name)(x, **first_function_args) z = k.eval(getattr(k, second_function_name)(y, **second_function_args)) z_list += [z] assert_list_pairwise(z_list) class TestBackend(object): def test_is_keras_tensor(self): np_var = np.array([1, 2]) with pytest.raises(ValueError): K.is_keras_tensor(np_var) keras_var = K.variable(np_var) assert K.is_keras_tensor(keras_var) is False keras_placeholder = K.placeholder(shape=(2, 4, 5)) assert K.is_keras_tensor(keras_placeholder) is False def test_set_learning_phase(self): # not supported learning_phase with pytest.raises(ValueError): K.set_learning_phase(2) def test_eye(self): check_single_tensor_operation('eye', 3, WITH_NP, shape_or_val=False) def test_ones(self): check_single_tensor_operation('ones', (3, 5, 10, 8), WITH_NP, shape_or_val=False) def test_zeros(self): check_single_tensor_operation('zeros', (3, 5, 10, 8), WITH_NP, shape_or_val=False) def test_ones_like(self): check_single_tensor_operation('ones_like', (3, 5, 10, 8), WITH_NP, shape_or_val=True) def test_zeros_like(self): check_single_tensor_operation('zeros_like', (3, 5, 10, 8), WITH_NP, shape_or_val=True) def test_linear_operations(self): check_two_tensor_operation('dot', (4, 2), (2, 4), WITH_NP) check_two_tensor_operation('dot', (4, 2), (5, 2, 3), WITH_NP) check_two_tensor_operation('batch_dot', (4, 2, 3), (4, 5, 3), BACKENDS, cntk_two_dynamicity=True, axes=(2, 2)) check_two_tensor_operation('batch_dot', (4, 2, 3), (4, 3), BACKENDS, cntk_two_dynamicity=True, axes=(2, 1)) check_two_tensor_operation('batch_dot', (4, 2), (4, 2, 3), BACKENDS, cntk_two_dynamicity=True, axes=(1, 1)) check_two_tensor_operation('batch_dot', (32, 20), (32, 20), BACKENDS, cntk_two_dynamicity=True, axes=1) check_two_tensor_operation('batch_dot', (32, 20), (32, 20), BACKENDS, cntk_two_dynamicity=True, axes=(1, 1)) check_single_tensor_operation('transpose', (4, 2), WITH_NP) check_single_tensor_operation('reverse', (4, 3, 2), WITH_NP, axes=1) if K.backend() != 'cntk': check_single_tensor_operation('reverse', (4, 3, 2), WITH_NP, axes=(1, 2)) def test_random_variables(self): check_single_tensor_operation('random_uniform_variable', (2, 3), WITH_NP, low=0., high=1., shape_or_val=False, assert_value_equality=False) check_single_tensor_operation('random_normal_variable', (2, 3), WITH_NP, mean=0., scale=1., shape_or_val=False, assert_value_equality=False) @pytest.mark.skipif(K.backend() != 'tensorflow', reason='Not supported.') def test_batch_dot_shape(self): x_batch = K.ones(shape=(32, 20)) y_batch = K.ones(shape=(32, 20)) xy_batch_dot = K.batch_dot(x_batch, y_batch, axes=1) assert_allclose(K.eval(xy_batch_dot), np.ones((32, 1)) * 20, atol=1e-05) xy_batch_dot = K.batch_dot(x_batch, y_batch, axes=0) assert_allclose(K.eval(xy_batch_dot), np.ones((20, 1)) * 32, atol=1e-05) # making sure swapping axes when ndim == 2 works x_batch = K.ones(shape=(32, 20)) y_batch = K.ones(shape=(20, 32)) xy_batch_dot = K.batch_dot(x_batch, y_batch, axes=(0, 1)) assert_allclose(K.eval(xy_batch_dot), np.ones((20, 1)) * 32, atol=1e-05) xy_batch_dot = K.batch_dot(x_batch, y_batch, axes=(1, 0)) assert_allclose(K.eval(xy_batch_dot), np.ones((32, 1)) * 20, atol=1e-05) def test_shape_operations(self): check_two_tensor_operation('concatenate', (4, 3), (4, 2), WITH_NP, axis=-1, concat_args=True) check_single_tensor_operation('reshape', (4, 2), WITH_NP, shape=(8, 1)) check_single_tensor_operation('permute_dimensions', (4, 2, 3), WITH_NP, pattern=(2, 0, 1)) check_single_tensor_operation('repeat', (4, 1), WITH_NP, n=3) check_single_tensor_operation('flatten', (4, 1), WITH_NP) check_single_tensor_operation('batch_flatten', (20, 2, 5), WITH_NP, cntk_dynamicity=True) check_single_tensor_operation('expand_dims', (4, 3), WITH_NP, axis=-1) check_single_tensor_operation('expand_dims', (4, 3, 2), WITH_NP, axis=1) check_single_tensor_operation('squeeze', (4, 3, 1), WITH_NP, axis=2) check_single_tensor_operation('squeeze', (4, 1, 1), WITH_NP, axis=1) check_composed_tensor_operations('reshape', {'shape': (4, 3, 1, 1)}, 'squeeze', {'axis': 2}, (4, 3, 1, 1), WITH_NP) @pytest.mark.skipif(K.backend() != 'theano', reason='We only test the shape inference of the ' 'theano backend.') def test_none_shape_operations(self): # Test shape inference when input # shape has `None` entries x = K.placeholder((3, None, 4)) y = K.batch_flatten(x) if hasattr(y, '_keras_shape'): assert y._keras_shape == (3, None) y = K.flatten(x) if hasattr(y, '_keras_shape'): assert y._keras_shape == (None, ) def test_repeat_elements(self): reps = 3 for ndims in [1, 2, 3]: shape = np.arange(2, 2 + ndims) arr = np.arange(np.prod(shape)).reshape(shape) for rep_axis in range(ndims): check_single_tensor_operation('repeat_elements', arr, WITH_NP, rep=reps, axis=rep_axis) if K.backend() != 'cntk': shape = list(shape) shape[rep_axis] = None x = K.placeholder(shape=shape) y = K.repeat_elements(x, reps, axis=rep_axis) assert y._keras_shape == tuple(shape) assert y._keras_shape == K.int_shape(y) def test_tile(self): shape = (3, 4) arr = np.arange(np.prod(shape)).reshape(shape) check_single_tensor_operation('tile', arr, WITH_NP, n=[2, 1]) check_single_tensor_operation('tile', (2, 5), WITH_NP, n=[5, 2]) # test theano shape inference when # input shape has None entries if K.backend() == 'theano': x = K.placeholder(shape=(None, 4)) n = 2 y = K.tile(x, n) assert y._keras_shape == (None, 8) n = (4, 3) y = K.tile(x, n) assert y._keras_shape == (None, 12) def test_gather(self): shape = (10, 2, 3) ref = np.arange(np.prod(shape)).reshape(shape) inds = [1, 3, 7, 9] t_list = [k.gather(k.variable(ref), k.variable(inds, dtype='int32')) for k in BACKENDS] z_list = [k.eval(k.gather(k.variable(ref), k.variable(inds, dtype='int32'))) for k in BACKENDS] assert_list_pairwise(z_list) assert_list_keras_shape(t_list, z_list) # test theano shape inference when # input shape has None entries if K.backend() == 'theano': x = K.placeholder(shape=(None, 3, 4)) indices = K.placeholder(shape=(5, 6), dtype='int32') y = K.gather(x, indices) assert y._keras_shape == (5, 6, 3, 4) def test_value_manipulation(self): val = np.random.random((4, 2)) for function_name in ['get_value', 'count_params', 'int_shape', 'get_variable_shape']: v_list = [getattr(k, function_name)(k.variable(val)) for k in BACKENDS] if function_name == 'get_value': assert_list_pairwise(v_list) else: assert_list_pairwise(v_list, shape=False, allclose=False, itself=True) # print_tensor check_single_tensor_operation('print_tensor', (), WITH_NP) check_single_tensor_operation('print_tensor', (2,), WITH_NP) check_single_tensor_operation('print_tensor', (4, 3), WITH_NP) check_single_tensor_operation('print_tensor', (1, 2, 3), WITH_NP) def test_elementwise_operations(self): check_single_tensor_operation('max', (4, 2), WITH_NP) check_single_tensor_operation('max', (4, 2), WITH_NP, axis=1, keepdims=True) check_single_tensor_operation('max', (4, 2, 3), WITH_NP, axis=[1, -1]) check_single_tensor_operation('min', (4, 2), WITH_NP) check_single_tensor_operation('min', (4, 2), WITH_NP, axis=1, keepdims=True) check_single_tensor_operation('min', (4, 2, 3), WITH_NP, axis=[1, -1]) check_single_tensor_operation('mean', (4, 2), WITH_NP) check_single_tensor_operation('mean', (4, 2), WITH_NP, axis=1, keepdims=True) check_single_tensor_operation('mean', (4, 2, 3), WITH_NP, axis=-1, keepdims=True) check_single_tensor_operation('mean', (4, 2, 3), WITH_NP, axis=[1, -1]) check_single_tensor_operation('var', (4, 2), WITH_NP) check_single_tensor_operation('var', (4, 2), WITH_NP, axis=1, keepdims=True) check_single_tensor_operation('var', (4, 2, 3), WITH_NP, axis=[1, -1]) check_single_tensor_operation('std', (4, 2), WITH_NP) check_single_tensor_operation('std', (4, 2), WITH_NP, axis=1, keepdims=True) check_single_tensor_operation('std', (4, 2, 3), WITH_NP, axis=[1, -1]) # check_single_tensor_operation('std', (4, 2, 3), BACKENDS, axis=[1, -1]) check_single_tensor_operation('logsumexp', (4, 2), WITH_NP) check_single_tensor_operation('logsumexp', (4, 2), WITH_NP, axis=1, keepdims=True) check_single_tensor_operation('logsumexp', (4, 2, 3), WITH_NP, axis=[1, -1]) check_single_tensor_operation('prod', (4, 2), WITH_NP) check_single_tensor_operation('prod', (4, 2), WITH_NP, axis=1, keepdims=True) check_single_tensor_operation('prod', (4, 2, 3), WITH_NP, axis=[1, -1]) check_single_tensor_operation('any', (4, 2), WITH_NP) check_single_tensor_operation('any', (4, 2), WITH_NP, axis=1, keepdims=True) check_single_tensor_operation('any', (4, 2, 3), WITH_NP, axis=[1, -1]) check_single_tensor_operation('all', (4, 2), WITH_NP) check_single_tensor_operation('all', (4, 2), WITH_NP, axis=1, keepdims=True) check_single_tensor_operation('all', (4, 2, 3), WITH_NP, axis=[1, -1]) check_single_tensor_operation('argmax', (4, 2), WITH_NP) check_single_tensor_operation('argmax', (4, 2), WITH_NP, axis=1) check_single_tensor_operation('argmin', (4, 2), WITH_NP) check_single_tensor_operation('argmin', (4, 2), WITH_NP, axis=1) check_single_tensor_operation('square', (4, 2), WITH_NP) check_single_tensor_operation('abs', (4, 2), WITH_NP) check_single_tensor_operation('sqrt', (4, 2), WITH_NP) check_single_tensor_operation('exp', (4, 2), WITH_NP) check_single_tensor_operation('round', (4, 2), WITH_NP) check_single_tensor_operation('sign', (4, 2), WITH_NP) check_single_tensor_operation('pow', (4, 2), WITH_NP, a=3) check_single_tensor_operation('clip', (4, 2), WITH_NP, min_value=0.4, max_value=0.6) check_single_tensor_operation('cos', (4, 2), WITH_NP) check_single_tensor_operation('sin', (4, 2), WITH_NP) # two-tensor ops check_two_tensor_operation('equal', (4, 2), (4, 2), WITH_NP) check_two_tensor_operation('not_equal', (4, 2), (4, 2), WITH_NP) check_two_tensor_operation('greater', (4, 2), (4, 2), WITH_NP) check_two_tensor_operation('greater_equal', (4, 2), (4, 2), WITH_NP) check_two_tensor_operation('less', (4, 2), (4, 2), WITH_NP) check_two_tensor_operation('less_equal', (4, 2), (4, 2), WITH_NP) check_two_tensor_operation('maximum', (4, 2), (4, 2), WITH_NP) check_two_tensor_operation('minimum', (4, 2), (4, 2), WITH_NP) @pytest.mark.skipif(K.backend() == 'cntk', reason='cntk does not support ' 'cumsum and cumprod yet') def test_cumsum_cumprod(self): check_single_tensor_operation('cumsum', (4, 2), WITH_NP) check_single_tensor_operation('cumsum', (4, 2), WITH_NP, axis=1) check_single_tensor_operation('cumprod', (4, 2), WITH_NP) check_single_tensor_operation('cumprod', (4, 2), WITH_NP, axis=1) @pytest.mark.skipif(K.backend() == 'cntk', reason='cntk return -85.1 for zero or ' 'negative number, not nan, so can\'t ' 'compare with other backend.') def test_log(self): check_single_tensor_operation('log', (4, 2), WITH_NP) @pytest.mark.skipif(K.backend() == 'cntk', reason='cntk doesn\'t support gradient in this way.') def test_gradient(self): val = np.random.random((4, 2)) x_list = [k.variable(val) for k in [KTH, KTF]] z_list = [] zero_list = [] for x, k in zip(x_list, [KTH, KTF]): exp = x * k.exp(x) loss = k.sum(exp) zero_loss = k.stop_gradient(loss) grad = k.gradients(loss, [exp]) zero_grad = k.gradients(loss + zero_loss, [exp]) z_list.append(k.eval(grad[0])) zero_list.append(k.eval(zero_grad[0])) assert_list_pairwise(z_list) assert_list_pairwise(zero_list) for i in range(len(z_list)): assert_allclose(zero_list[i], z_list[i], atol=1e-05) def test_stop_gradient(self): # This test checks the consistency of the stop_gradient backend API. # It doesn't check the functionality (which is checked at the # test_gradient test). val = np.random.random((4, 2)) a = K.variable(val) b = K.square(a) c, d = K.stop_gradient([a, b]) e = K.stop_gradient(b) @pytest.mark.skipif(K.backend() == 'cntk', reason='cntk currently not support function in this ' 'way, so can\'t test as this.') def test_function(self): test_backend = [KTH, KTF] val = np.random.random((4, 2)) input_val = np.random.random((4, 2)) f_list = [] x_list = [] for k in test_backend: x = k.variable(val) x_list.append(x) y = k.placeholder(ndim=2) exp = k.square(x) + y update = x * 2 f = k.function([y], [exp], updates=[(x, update)]) f_list.append(f) function_outputs_list = [f([input_val])[0] for f in f_list] assert_list_pairwise(function_outputs_list) new_val_list = [k.get_value(x) for x, k in zip(x_list, test_backend)] assert_list_pairwise(new_val_list) @pytest.mark.skipif(K.backend() != 'tensorflow', reason='Uses the `fetches` argument.') def test_function_tf_fetches(self): # Additional operations can be passed to tf.Session().run() via its # `fetches` arguments. In contrast to `updates` argument of # KTF.function() these do not have control dependency on `outputs`, so # they can run in parallel. Also they should not contribute to output of # KTF.function(). x = K.variable(0.) y = K.variable(0.) x_placeholder = K.placeholder(shape=()) y_placeholder = K.placeholder(shape=()) f = K.function(inputs=[x_placeholder, y_placeholder], outputs=[x_placeholder + y_placeholder], updates=[(x, x_placeholder + 1.)], fetches=[K.update(y, 5.)]) output = f([10., 20.]) assert output == [30.] assert K.get_session().run(fetches=[x, y]) == [11., 5.] @pytest.mark.skipif(K.backend() != 'tensorflow', reason='Uses the `feed_dict` argument.') def test_function_tf_feed_dict(self): # Additional substitutions can be passed to `tf.Session().run()` via its # `feed_dict` arguments. Note that the feed_dict is passed once in the # constructor but we can modify the values in the dictionary. Through # this feed_dict we can provide additional substitutions besides Keras # inputs. x = K.variable(0.) y = K.variable(0.) x_placeholder = K.placeholder(shape=()) y_placeholder = K.placeholder(shape=()) feed_dict = {y_placeholder: 3.} f = K.function(inputs=[x_placeholder], outputs=[x_placeholder + 1.], updates=[(x, x_placeholder + 10.)], feed_dict=feed_dict, fetches=[K.update(y, y_placeholder * 10.)]) output = f([10.]) assert output == [11.] assert K.get_session().run(fetches=[x, y]) == [20., 30.] # updated value in feed_dict will be modified within the K.function() feed_dict[y_placeholder] = 4. output = f([20.]) assert output == [21.] assert K.get_session().run(fetches=[x, y]) == [30., 40.] @pytest.mark.skipif(K.backend() != 'tensorflow', reason='Uses the `options` and `run_metadata` arguments.') def test_function_tf_run_options_with_run_metadata(self): from tensorflow.core.protobuf import config_pb2 x_placeholder = K.placeholder(shape=()) y_placeholder = K.placeholder(shape=()) run_options = config_pb2.RunOptions(output_partition_graphs=True) run_metadata = config_pb2.RunMetadata() # enable run_options. f = K.function(inputs=[x_placeholder, y_placeholder], outputs=[x_placeholder + y_placeholder], options=run_options, run_metadata=run_metadata) output = f([10., 20.]) assert output == [30.] assert len(run_metadata.partition_graphs) > 0 # disable run_options. f = K.function(inputs=[x_placeholder, y_placeholder], outputs=[x_placeholder + y_placeholder], run_metadata=run_metadata) output = f([10., 20.]) assert output == [30.] assert len(run_metadata.partition_graphs) == 0 @pytest.mark.skipif(K.backend() != 'tensorflow', reason='Uses the `string` type for a tensor.') def test_function_tf_string_input(self): # Test functions with string inputs. x_placeholder = K.placeholder(shape=(), dtype="string") x_identity = K.identity(x_placeholder) f = K.function(inputs=[x_placeholder], outputs=[x_identity]) output = f([b'test']) assert output == [b'test'] def test_rnn(self): # implement a simple RNN num_samples = 4 input_dim = 5 output_dim = 3 timesteps = 6 _, x = parse_shape_or_val((num_samples, timesteps, input_dim)) _, h0 = parse_shape_or_val((num_samples, output_dim)) _, wi = parse_shape_or_val((input_dim, output_dim)) _, wh = parse_shape_or_val((output_dim, output_dim)) mask = np.random.randint(2, size=(num_samples, timesteps)) x_k = K.variable(x) h0_k = [K.variable(h0)] wi_k = K.variable(wi) wh_k = K.variable(wh) mask_k = K.variable(mask) def rnn_fn(x_k, h_k): assert len(h_k) == 1 y_k = K.dot(x_k, wi_k) + K.dot(h_k[0], wh_k) return y_k, [y_k] # test default setup last_output_list = [] outputs_list = [] state_list = [] kwargs_list = [ {'go_backwards': False, 'mask': None}, {'go_backwards': False, 'mask': None, 'unroll': True, 'input_length': timesteps}, {'go_backwards': True, 'mask': None}, {'go_backwards': True, 'mask': None, 'unroll': True, 'input_length': timesteps}, {'go_backwards': False, 'mask': mask_k}, {'go_backwards': False, 'mask': mask_k, 'unroll': True, 'input_length': timesteps}, ] for (i, kwargs) in enumerate(kwargs_list): last_y1, y1, h1 = KNP.rnn(x, [wi, wh, None], h0, **kwargs) last_y2, y2, h2 = K.rnn(rnn_fn, x_k, h0_k, **kwargs) assert len(h2) == 1 last_y2 = K.eval(last_y2) y2 = K.eval(y2) h1 = h1[:, -1] h2 = K.eval(h2[0]) if kwargs['mask'] is not None: last_y1 = last_y1 * np.expand_dims(mask[:, -1], -1) last_y2 = last_y2 * np.expand_dims(mask[:, -1], -1) y1 = y1 * np.expand_dims(mask, -1) y2 = y2 * np.expand_dims(mask, -1) h1 = h1 * np.expand_dims(mask[:, -1], -1) h2 = h2 * np.expand_dims(mask[:, -1], -1) last_output_list.append(last_y2) outputs_list.append(y2) state_list.append(h2) if i % 2 == 0: assert_allclose(last_y1, last_y2, atol=1e-05) assert_allclose(y1, y2, atol=1e-05) assert_allclose(h1, h2, atol=1e-05) else: assert_allclose(last_output_list[i - 1], last_output_list[i], atol=1e-05) assert_allclose(outputs_list[i - 1], outputs_list[i], atol=1e-05) assert_allclose(state_list[i - 1], state_list[i], atol=1e-05) def test_rnn_additional_states(self): # implement a simple RNN with an additional state # whose shape is different from that of the output num_samples = 4 input_dim = 5 output_dim = 3 timesteps = 6 _, x = parse_shape_or_val((num_samples, timesteps, input_dim)) _, h0 = parse_shape_or_val((num_samples, output_dim)) _, wi = parse_shape_or_val((input_dim, output_dim)) _, wh = parse_shape_or_val((output_dim, output_dim)) mask = np.random.randint(2, size=(num_samples, timesteps)) x_k = K.variable(x) h0_k = [K.variable(h0), K.variable(np.concatenate([h0, h0], axis=-1))] wi_k = K.variable(wi) wh_k = K.variable(wh) mask_k = K.variable(mask) def rnn_fn(x_k, h_k): assert len(h_k) == 2 y_k = K.dot(x_k, wi_k) + K.dot(h_k[0], wh_k) return y_k, [y_k, K.concatenate([y_k, y_k], axis=-1)] # test default setup last_output_list = [] outputs_list = [] state_list = [] kwargs_list = [ {'go_backwards': False, 'mask': None}, {'go_backwards': False, 'mask': None, 'unroll': True, 'input_length': timesteps}, {'go_backwards': True, 'mask': None}, {'go_backwards': True, 'mask': None, 'unroll': True, 'input_length': timesteps}, {'go_backwards': False, 'mask': mask_k}, {'go_backwards': False, 'mask': mask_k, 'unroll': True, 'input_length': timesteps}, ] for (i, kwargs) in enumerate(kwargs_list): last_y1, y1, h1 = KNP.rnn(x, [wi, wh, None], h0, **kwargs) last_y2, y2, h2 = K.rnn(rnn_fn, x_k, h0_k, **kwargs) assert len(h2) == 2 last_y2 = K.eval(last_y2) y2 = K.eval(y2) h11 = h1[:, -1] h12 = np.concatenate([h1[:, -1], h1[:, -1]], axis=-1) h21 = K.eval(h2[0]) h22 = K.eval(h2[1]) if kwargs['mask'] is not None: last_y1 = last_y1 * np.expand_dims(mask[:, -1], -1) last_y2 = last_y2 * np.expand_dims(mask[:, -1], -1) y1 = y1 * np.expand_dims(mask, -1) y2 = y2 * np.expand_dims(mask, -1) h11 = h11 * np.expand_dims(mask[:, -1], -1) h21 = h21 * np.expand_dims(mask[:, -1], -1) h12 = h12 * np.expand_dims(mask[:, -1], -1) h22 = h22 * np.expand_dims(mask[:, -1], -1) last_output_list.append(last_y2) outputs_list.append(y2) state_list.append((h21, h22)) if i % 2 == 0: assert_allclose(last_y1, last_y2, atol=1e-05) assert_allclose(y1, y2, atol=1e-05) assert_allclose(h11, h21, atol=1e-05) assert_allclose(h12, h22, atol=1e-05) else: assert_allclose(last_output_list[i - 1], last_output_list[i], atol=1e-05) assert_allclose(outputs_list[i - 1], outputs_list[i], atol=1e-05) assert_allclose(state_list[i - 1][0], state_list[i][0], atol=1e-05) assert_allclose(state_list[i - 1][1], state_list[i][1], atol=1e-05) def test_rnn_no_states(self): # implement a simple RNN without states input_dim = 8 output_dim = 4 timesteps = 5 _, x = parse_shape_or_val((32, timesteps, input_dim)) _, wi = parse_shape_or_val((input_dim, output_dim)) x_k = K.variable(x) wi_k = K.variable(wi) def rnn_fn(x_k, h_k): assert len(h_k) == 0 y_k = K.dot(x_k, wi_k) return y_k, [] last_y1, y1, h1 = KNP.rnn(x, [wi, None, None], None, go_backwards=False, mask=None) last_y2, y2, h2 = K.rnn(rnn_fn, x_k, [], go_backwards=False, mask=None) assert len(h2) == 0 last_y2 = K.eval(last_y2) y2 = K.eval(y2) assert_allclose(last_y1, last_y2, atol=1e-05) assert_allclose(y1, y2, atol=1e-05) def legacy_test_rnn(self): # implement a simple RNN num_samples = 4 input_dim = 5 output_dim = 3 timesteps = 6 input_val = np.random.random((num_samples, timesteps, input_dim)).astype(np.float32) init_state_val = np.random.random((num_samples, output_dim)).astype(np.float32) W_i_val = np.random.random((input_dim, output_dim)).astype(np.float32) W_o_val = np.random.random((output_dim, output_dim)).astype(np.float32) np_mask = np.random.randint(2, size=(num_samples, timesteps)) def rnn_step_fn(k): W_i = k.variable(W_i_val) W_o = k.variable(W_o_val) def step_function(x, states): assert len(states) == 1 prev_output = states[0] output = k.dot(x, W_i) + k.dot(prev_output, W_o) return output, [output] return step_function # test default setup last_output_list = [[], [], [], [], [], []] outputs_list = [[], [], [], [], [], []] state_list = [[], [], [], [], [], []] for k in BACKENDS: rnn_fn = rnn_step_fn(k) inputs = k.variable(input_val) initial_states = [k.variable(init_state_val)] mask = k.variable(np_mask) kwargs_list = [ {'go_backwards': False, 'mask': None}, {'go_backwards': False, 'mask': None, 'unroll': True, 'input_length': timesteps}, {'go_backwards': True, 'mask': None}, {'go_backwards': True, 'mask': None, 'unroll': True, 'input_length': timesteps}, {'go_backwards': False, 'mask': mask}, {'go_backwards': False, 'mask': mask, 'unroll': True, 'input_length': timesteps}, ] for (i, kwargs) in enumerate(kwargs_list): last_output, outputs, new_states = k.rnn(rnn_fn, inputs, initial_states, **kwargs) last_output_list[i].append(k.eval(last_output)) outputs_list[i].append(k.eval(outputs)) assert len(new_states) == 1 state_list[i].append(k.eval(new_states[0])) assert_list_pairwise(last_output_list[0], shape=False, atol=1e-04) assert_list_pairwise(outputs_list[0], shape=False, atol=1e-04) assert_list_pairwise(state_list[0], shape=False, atol=1e-04) assert_list_pairwise(last_output_list[2], shape=False, atol=1e-04) assert_list_pairwise(outputs_list[2], shape=False, atol=1e-04) assert_list_pairwise(state_list[2], shape=False, atol=1e-04) for l, u_l in zip(last_output_list[0], last_output_list[1]): assert_allclose(l, u_l, atol=1e-04) for o, u_o in zip(outputs_list[0], outputs_list[1]): assert_allclose(o, u_o, atol=1e-04) for s, u_s in zip(state_list[0], state_list[1]): assert_allclose(s, u_s, atol=1e-04) for b_l, b_u_l in zip(last_output_list[2], last_output_list[3]): assert_allclose(b_l, b_u_l, atol=1e-04) for b_o, b_u_o in zip(outputs_list[2], outputs_list[3]): assert_allclose(b_o, b_u_o, atol=1e-04) for b_s, b_u_s in zip(state_list[2], state_list[3]): assert_allclose(b_s, b_u_s, atol=1e-04) for m_l, u_m_l, k in zip(last_output_list[4], last_output_list[5], BACKENDS): if k == KTF: m_l = m_l * np.expand_dims(np_mask[:, -1], -1) u_m_l = u_m_l * np.expand_dims(np_mask[:, -1], -1) assert_allclose(m_l, u_m_l, atol=1e-04) for m_o, u_m_o, k in zip(outputs_list[4], outputs_list[5], BACKENDS): if k == KTF: m_o = m_o * np.expand_dims(np_mask, -1) u_m_o = u_m_o * np.expand_dims(np_mask, -1) assert_allclose(m_o, u_m_o, atol=1e-04) for m_s, u_m_s, k in zip(state_list[4], state_list[5], BACKENDS): assert_allclose(m_s, u_m_s, atol=1e-04) def legacy_test_rnn_no_states(self): # implement a simple RNN without states input_dim = 8 output_dim = 4 timesteps = 5 input_val = np.random.random((32, timesteps, input_dim)) W_i_val = np.random.random((input_dim, output_dim)) def rnn_step_fn(k): W_i = k.variable(W_i_val) def step_function(x, states): assert len(states) == 0 output = k.dot(x, W_i) return output, [] return step_function # test default setup last_output_list = [] outputs_list = [] for k in BACKENDS: rnn_fn = rnn_step_fn(k) inputs = k.variable(input_val) initial_states = [] last_output, outputs, new_states = k.rnn(rnn_fn, inputs, initial_states, go_backwards=False, mask=None) last_output_list.append(k.eval(last_output)) outputs_list.append(k.eval(outputs)) assert len(new_states) == 0 assert_list_pairwise(last_output_list, shape=False) assert_list_pairwise(outputs_list, shape=False) @pytest.mark.parametrize('x_np,axis,keepdims', [ (np.array([1.1, 0.8, 0.9]), 0, False), (np.array([[1.1, 0.8, 0.9]]), 0, False), (np.array([[1.1, 0.8, 0.9]]), 1, False), (np.array([[1.1, 0.8, 0.9]]), -1, False), (np.array([[1.1, 0.8, 0.9]]), 1, True), (np.array([[1.1], [1.2]]), 0, False), (np.array([[1.1], [1.2]]), 1, False), (np.array([[1.1], [1.2]]), -1, False), (np.array([[1.1], [1.2]]), -1, True), (np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), None, False), (np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), 0, False), (np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), 1, False), (np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), -1, False), ]) def test_logsumexp(self, x_np, axis, keepdims): ''' Check if K.logsumexp works properly for values close to one. ''' x = K.variable(x_np) assert_allclose(K.eval(K.logsumexp(x, axis=axis, keepdims=keepdims)), np.log(np.sum(np.exp(x_np), axis=axis, keepdims=keepdims)), rtol=1e-5) def test_logsumexp_optim(self): ''' Check if optimization works. ''' for k in [KTF]: x_np = np.array([1e+4, 1e-4]) assert_allclose(k.eval(k.logsumexp(k.variable(x_np), axis=0)), 1e4, rtol=1e-5) def test_switch(self): # scalar val = np.random.random() z_list = [] for k in WITH_NP: x = k.variable(val) x = k.switch(k.greater_equal(x, 0.5), x * 0.1, x * 0.2) z_list.append(k.eval(x)) assert_list_pairwise(z_list) # non scalar shapes = [] shapes.append([(4, 3, 2), (4, 3, 2), (4, 3, 2)]) shapes.append([(4, 3,), (4, 3, 2), (4, 3, 2)]) shapes.append([(4,), (4, 3, 2), (4, 3, 2)]) for s in shapes: z_list = [] arrays = list(map(np.random.random, s)) for k in WITH_NP: x, then_expr, else_expr = map(k.variable, arrays) cond = k.greater_equal(x, 0.5) z_list.append(k.eval(k.switch(cond, then_expr, else_expr))) assert_list_pairwise(z_list) def test_dropout(self): val = np.random.random((100, 100)) z_list = [k.eval(k.dropout(k.variable(val), level=0.2)) for k in BACKENDS] assert_list_pairwise(z_list, allclose=False) # dropout patterns are different, only check mean for i in range(len(z_list) - 1): assert np.abs(z_list[i].mean() - z_list[i + 1].mean()) < 0.05 z_list = [k.eval(k.dropout(k.variable(val), level=0.2, noise_shape=list(val.shape))) for k in BACKENDS] assert_list_pairwise(z_list, allclose=False) # dropout patterns are different, only check mean for i in range(len(z_list) - 1): assert np.abs(z_list[i].mean() - z_list[i + 1].mean()) < 0.05 # Test invalid use cases with pytest.raises(ValueError): z = K.dropout(K.variable(val), level=-0.5) @pytest.mark.parametrize('alpha,max_value,threshold', [ (0.0, None, 0.0), # standard relu (0.1, None, 0.0), # set alpha only (0.0, 5.0, 0.0), # set max_value only (0.0, None, 0.8), # set threshold only (0.1, 5.0, 0.0), # set alpha and max_value (0.1, None, 0.8), # set alpha and threshold (0.0, 5.0, 0.8), # set max_value and threshold (0.1, 5.0, 0.8), # set all (0.1, 0.0, 0.8), # max_value is zero (0.1, 5.0, -2.8), # threshold is negative (0.1, 9.0, 0.8), # max_value > 6 ]) def test_relu(self, alpha, max_value, threshold): check_single_tensor_operation('relu', (4, 2), WITH_NP, alpha=alpha, max_value=max_value, threshold=threshold) def test_nn_operations(self): check_single_tensor_operation('softplus', (4, 10), WITH_NP) check_single_tensor_operation('elu', (4, 10), WITH_NP, alpha=0.5) check_single_tensor_operation('sigmoid', (4, 2), WITH_NP) check_single_tensor_operation('hard_sigmoid', (4, 2), WITH_NP) check_single_tensor_operation('tanh', (4, 2), WITH_NP) check_single_tensor_operation('softmax', (4, 10), WITH_NP) check_single_tensor_operation('softmax', (4, 5, 3), WITH_NP, axis=1) check_single_tensor_operation('softmax', (4, 5, 3, 10), WITH_NP, axis=2) check_two_tensor_operation('binary_crossentropy', (4, 2), (4, 2), WITH_NP, from_logits=True) # cross_entropy call require the label is a valid probability distribution, # otherwise it is garbage in garbage out... # due to the algo difference, we can't guarantee CNTK has the same result on the garbage input. # so create a separate test case for valid label input if K.backend() != 'cntk': check_two_tensor_operation('categorical_crossentropy', (4, 2), (4, 2), WITH_NP, from_logits=True) xval = np.asarray([[0.26157712, 0.0432167], [-0.43380741, 0.30559841], [0.20225059, -0.38956559], [-0.13805378, 0.08506755]], dtype=np.float32) yval = np.asarray([[0.46221867, 0.53778133], [0.51228984, 0.48771016], [0.64916514, 0.35083486], [0.47028078, 0.52971922]], dtype=np.float32) check_two_tensor_operation('categorical_crossentropy', yval, xval, WITH_NP, cntk_two_dynamicity=True, from_logits=True) check_two_tensor_operation('binary_crossentropy', (4, 2), (4, 2), WITH_NP, from_logits=False) check_two_tensor_operation('categorical_crossentropy', (4, 2), (4, 2), WITH_NP, from_logits=False) check_single_tensor_operation('l2_normalize', (4, 3), WITH_NP, axis=-1) check_single_tensor_operation('l2_normalize', (4, 3), WITH_NP, axis=1) def test_in_top_k(self): batch_size = 20 num_classes = 10 # Random prediction test case predictions = np.random.random((batch_size, num_classes)).astype('float32') targets = np.random.randint(num_classes, size=batch_size, dtype='int32') # (k == 0 or k > num_classes) does not raise an error but just return an unmeaningful tensor. for k in range(num_classes + 1): z_list = [b.eval(b.in_top_k(b.variable(predictions, dtype='float32'), b.variable(targets, dtype='int32'), k)) for b in [KTH, KTF]] assert_list_pairwise(z_list) # Identical prediction test case: # randomly set half of the predictions to an identical value num_identical = num_classes // 2 for i in range(batch_size): idx_identical = np.random.choice(num_classes, size=num_identical, replace=False) predictions[i, idx_identical] = predictions[i, 0] targets = np.zeros(batch_size, dtype='int32') for k in range(1, num_classes + 1): z_list = [b.eval(b.in_top_k(b.variable(predictions, dtype='float32'), b.variable(targets, dtype='int32'), k)) for b in [KTH, KTF]] assert_list_pairwise(z_list) @pytest.mark.parametrize('op,input_shape,kernel_shape,padding,data_format', [ ('conv1d', (2, 8, 2), (3, 2, 3), 'same', 'channels_last'), ('conv1d', (1, 8, 2), (3, 2, 3), 'valid', 'channels_last'), ('conv1d', (1, 2, 8), (3, 2, 3), 'valid', 'channels_first'), ('conv2d', (2, 3, 4, 5), (3, 3, 3, 2), 'same', 'channels_first'), ('conv2d', (2, 3, 5, 6), (4, 3, 3, 4), 'valid', 'channels_first'), ('conv2d', (1, 6, 5, 3), (3, 4, 3, 2), 'valid', 'channels_last'), ('conv2d', (1, 7, 6, 3), (3, 3, 3, 4), 'same', 'channels_last'), ('conv3d', (2, 3, 4, 5, 4), (3, 3, 3, 3, 4), 'same', 'channels_first'), ('conv3d', (2, 3, 5, 4, 6), (3, 2, 4, 3, 4), 'valid', 'channels_first'), ('conv3d', (1, 2, 2, 2, 1), (2, 2, 2, 1, 1), 'valid', 'channels_last'), ('conv3d', (1, 3, 5, 4, 2), (3, 3, 3, 2, 3), 'same', 'channels_last'), ]) def test_conv(self, op, input_shape, kernel_shape, padding, data_format): check_two_tensor_operation( op, input_shape, kernel_shape, WITH_NP, padding=padding, data_format=data_format, cntk_dynamicity=True) @pytest.mark.parametrize( 'op,input_shape,kernel_shape,output_shape,padding,data_format', [ ('conv2d_transpose', (2, 5, 6, 3), (3, 3, 2, 3), (2, 5, 6, 2), 'same', 'channels_last'), ('conv2d_transpose', (2, 3, 8, 9), (3, 3, 2, 3), (2, 2, 8, 9), 'same', 'channels_first'), ]) def test_conv_transpose(self, op, input_shape, kernel_shape, output_shape, padding, data_format): check_two_tensor_operation( op, input_shape, kernel_shape, WITH_NP, output_shape=output_shape, padding=padding, data_format=data_format, cntk_dynamicity=True) @pytest.mark.skipif((K.backend() == 'cntk' and K.dev.type() == 0), reason='cntk only supports dilated conv on GPU') @pytest.mark.parametrize('op,input_shape,kernel_shape,padding,data_format,dilation_rate', [ ('conv1d', (2, 8, 3), (4, 3, 2), 'valid', 'channels_last', 2), ('conv1d', (2, 3, 8), (4, 3, 2), 'valid', 'channels_first', 2), ('conv2d', (2, 8, 9, 3), (3, 3, 3, 2), 'same', 'channels_last', (2, 2)), ('conv2d', (2, 3, 9, 8), (4, 3, 3, 4), 'valid', 'channels_first', (2, 2)), ('conv3d', (2, 5, 4, 6, 3), (2, 2, 3, 3, 4), 'valid', 'channels_last', (2, 2, 2)), ('conv3d', (2, 3, 5, 4, 6), (2, 2, 3, 3, 4), 'same', 'channels_first', (2, 2, 2)), ]) def test_dilated_conv(self, op, input_shape, kernel_shape, padding, data_format, dilation_rate): check_two_tensor_operation( op, input_shape, kernel_shape, WITH_NP, padding=padding, data_format=data_format, dilation_rate=dilation_rate, cntk_dynamicity=True) @pytest.mark.skipif((K.backend() == 'cntk' and K.dev.type() == 0), reason='cntk only supports dilated conv transpose on GPU') @pytest.mark.parametrize( 'op,input_shape,kernel_shape,output_shape,padding,data_format,dilation_rate', [ ('conv2d_transpose', (2, 5, 6, 3), (3, 3, 2, 3), (2, 5, 6, 2), 'same', 'channels_last', (2, 2)), ('conv2d_transpose', (2, 3, 8, 9), (3, 3, 2, 3), (2, 2, 8, 9), 'same', 'channels_first', (2, 2)), ]) def test_dilated_conv_transpose(self, op, input_shape, kernel_shape, output_shape, padding, data_format, dilation_rate): check_two_tensor_operation( op, input_shape, kernel_shape, WITH_NP, output_shape=output_shape, padding=padding, data_format=data_format, dilation_rate=dilation_rate, cntk_dynamicity=True) @pytest.mark.parametrize('op,input_shape,kernel_shape,padding,data_format', [ ('depthwise_conv2d', (2, 3, 4, 5), (3, 3, 3, 2), 'same', 'channels_first'), ('depthwise_conv2d', (2, 3, 5, 6), (4, 3, 3, 4), 'valid', 'channels_first'), ('depthwise_conv2d', (1, 6, 5, 3), (3, 4, 3, 2), 'valid', 'channels_last'), ('depthwise_conv2d', (1, 7, 6, 3), (3, 3, 3, 4), 'same', 'channels_last'), ]) def test_depthwise_conv(self, op, input_shape, kernel_shape, padding, data_format): check_two_tensor_operation( op, input_shape, kernel_shape, WITH_NP, padding=padding, data_format=data_format, cntk_dynamicity=True) @pytest.mark.parametrize('op,input_shape,pool_size,strides,padding,data_format,pool_mode', [ ('pool2d', (2, 3, 7, 7), (3, 3), (1, 1), 'same', 'channels_first', 'avg'), ('pool2d', (3, 3, 8, 5), (2, 3), (1, 1), 'valid', 'channels_first', 'max'), ('pool2d', (2, 9, 5, 3), (3, 2), (1, 1), 'valid', 'channels_last', 'avg'), ('pool2d', (3, 6, 7, 3), (3, 3), (1, 1), 'same', 'channels_last', 'max'), ('pool3d', (2, 3, 7, 7, 7), (3, 3, 3), (1, 1, 1), 'same', 'channels_first', 'avg'), ('pool3d', (3, 3, 8, 5, 9), (2, 3, 2), (1, 1, 1), 'valid', 'channels_first', 'max'), ('pool3d', (2, 8, 9, 5, 3), (3, 2, 3), (1, 1, 1), 'valid', 'channels_last', 'avg'), ('pool3d', (3, 5, 6, 7, 3), (3, 3, 3), (1, 1, 1), 'same', 'channels_last', 'max'), ]) def test_pool(self, op, input_shape, pool_size, strides, padding, data_format, pool_mode): check_single_tensor_operation( op, input_shape, WITH_NP, pool_size=pool_size, strides=strides, padding=padding, data_format=data_format, pool_mode=pool_mode, cntk_dynamicity=True) def legacy_test_conv1d(self): # channels_last input shape: (n, length, input_depth) input_shape = (4, 8, 2) kernel_shape = (3, 2, 3) for strides in [1, 2]: check_two_tensor_operation('conv1d', input_shape, kernel_shape, BACKENDS, cntk_dynamicity=True, strides=strides, data_format='channels_last') def legacy_test_conv2d(self): # TF kernel shape: (rows, cols, input_depth, depth) # channels_first input shape: (n, input_depth, rows, cols) for (input_shape, kernel_shape, data_format) in [ ((2, 3, 4, 5), (2, 2, 3, 4), 'channels_first'), ((2, 3, 5, 6), (4, 3, 3, 4), 'channels_first'), ((1, 6, 5, 3), (3, 3, 3, 2), 'channels_last')]: check_two_tensor_operation('conv2d', input_shape, kernel_shape, BACKENDS, cntk_dynamicity=True, data_format=data_format) def legacy_test_depthwise_conv_2d(self): # TF kernel shape: (rows, cols, input_depth, depth_multiplier) # channels_first input shape: (n, input_depth, rows, cols) for (input_shape, kernel_shape, data_format) in [ ((2, 3, 4, 5), (2, 2, 3, 4), 'channels_first'), ((2, 3, 5, 6), (4, 3, 3, 4), 'channels_first'), ((1, 6, 5, 3), (3, 3, 3, 2), 'channels_last')]: check_two_tensor_operation('depthwise_conv2d', input_shape, kernel_shape, BACKENDS, cntk_dynamicity=True, data_format=data_format) def legacy_test_conv3d(self): # TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3) # TF input shape: (samples, conv_dim1, conv_dim2, conv_dim3, input_depth) # TH kernel shape: (depth, input_depth, x, y, z) # TF kernel shape: (x, y, z, input_depth, depth) for (input_shape, kernel_shape, data_format) in [ ((2, 3, 4, 5, 4), (2, 2, 2, 3, 4), 'channels_first'), ((2, 3, 5, 4, 6), (3, 2, 4, 3, 4), 'channels_first'), ((1, 2, 2, 2, 1), (2, 2, 2, 1, 1), 'channels_last')]: check_two_tensor_operation('conv3d', input_shape, kernel_shape, BACKENDS, cntk_dynamicity=True, data_format=data_format) @pytest.mark.parametrize('op,input_shape,kernel_shape,depth_multiplier,padding,data_format', [ ('separable_conv1d', (2, 8, 2), (3,), 1, 'same', 'channels_last'), ('separable_conv1d', (1, 8, 2), (3,), 2, 'valid', 'channels_last'), ('separable_conv2d', (2, 3, 4, 5), (3, 3), 1, 'same', 'channels_first'), ('separable_conv2d', (2, 3, 5, 6), (4, 3), 2, 'valid', 'channels_first'), ('separable_conv2d', (1, 6, 5, 3), (3, 4), 1, 'valid', 'channels_last'), ('separable_conv2d', (1, 7, 6, 3), (3, 3), 2, 'same', 'channels_last'), ]) def test_separable_conv(self, op, input_shape, kernel_shape, depth_multiplier, padding, data_format): input_depth = input_shape[1] if data_format == 'channels_first' else input_shape[-1] _, x = parse_shape_or_val(input_shape) _, depthwise = parse_shape_or_val(kernel_shape + (input_depth, depth_multiplier)) _, pointwise = parse_shape_or_val((1,) * len(kernel_shape) + (input_depth * depth_multiplier, 7)) y1 = KNP.separable_conv(x, depthwise, pointwise, padding=padding, data_format=data_format) if K.backend() == 'cntk': _, cntk_func = cntk_func_tensors( op, [input_shape, depthwise, pointwise], padding=padding, data_format=data_format) y2 = cntk_func([x])[0] else: y2 = K.eval(getattr(K, op)( K.variable(x), K.variable(depthwise), K.variable(pointwise), padding=padding, data_format=data_format)) assert_allclose(y1, y2, atol=1e-05) def legacy_test_pool2d(self): check_single_tensor_operation('pool2d', (5, 10, 12, 3), BACKENDS, cntk_dynamicity=True, pool_size=(2, 2), strides=(1, 1), padding='valid') check_single_tensor_operation('pool2d', (5, 9, 11, 3), BACKENDS, cntk_dynamicity=True, pool_size=(2, 2), strides=(1, 1), padding='valid') check_single_tensor_operation('pool2d', (5, 9, 11, 3), BACKENDS, cntk_dynamicity=True, pool_size=(2, 2), strides=(1, 1), pool_mode='avg') check_single_tensor_operation('pool2d', (5, 9, 11, 3), BACKENDS, cntk_dynamicity=True, pool_size=(2, 3), strides=(1, 1), padding='valid') check_single_tensor_operation('pool2d', (2, 7, 7, 5), BACKENDS, cntk_dynamicity=True, pool_size=(3, 3), strides=(1, 1), padding='same', pool_mode='avg') def legacy_test_pool3d(self): check_single_tensor_operation('pool3d', (5, 10, 12, 5, 3), BACKENDS, cntk_dynamicity=True, pool_size=(2, 2, 2), strides=(1, 1, 1), padding='valid') check_single_tensor_operation('pool3d', (5, 9, 11, 5, 3), BACKENDS, cntk_dynamicity=True, pool_size=(2, 2, 2), strides=(1, 1, 1), padding='valid') check_single_tensor_operation('pool3d', (5, 9, 11, 5, 3), BACKENDS, cntk_dynamicity=True, pool_size=(2, 2, 2), strides=(1, 1, 1), pool_mode='avg') check_single_tensor_operation('pool3d', (5, 9, 11, 5, 3), BACKENDS, cntk_dynamicity=True, pool_size=(2, 3, 2), strides=(1, 1, 1), padding='valid') check_single_tensor_operation('pool3d', (2, 6, 6, 6, 3), [KTH, KTF], pool_size=(3, 3, 3), strides=(1, 1, 1), padding='same', pool_mode='avg') def test_random_normal(self): # test standard normal as well as a normal with a different set of parameters for mean, std in [(0., 1.), (-10., 5.)]: rand = K.eval(K.random_normal((300, 200), mean=mean, stddev=std, seed=1337)) assert rand.shape == (300, 200) assert np.abs(np.mean(rand) - mean) < std * 0.015 assert np.abs(np.std(rand) - std) < std * 0.015 # test that random_normal also generates different values when used within a function r = K.random_normal((10, 10), mean=mean, stddev=std, seed=1337) samples = np.array([K.eval(r) for _ in range(200)]) assert np.abs(np.mean(samples) - mean) < std * 0.015 assert np.abs(np.std(samples) - std) < std * 0.015 def test_random_uniform(self): min_val = -1. max_val = 1. rand = K.eval(K.random_uniform((200, 100), min_val, max_val)) assert rand.shape == (200, 100) assert np.abs(np.mean(rand)) < 0.015 assert max_val - 0.015 < np.max(rand) <= max_val assert min_val + 0.015 > np.min(rand) >= min_val r = K.random_uniform((10, 10), minval=min_val, maxval=max_val) samples = np.array([K.eval(r) for _ in range(200)]) assert np.abs(np.mean(samples)) < 0.015 assert max_val - 0.015 < np.max(samples) <= max_val assert min_val + 0.015 > np.min(samples) >= min_val def test_random_binomial(self): p = 0.5 rand = K.eval(K.random_binomial((200, 100), p)) assert rand.shape == (200, 100) assert np.abs(np.mean(rand) - p) < 0.015 assert np.max(rand) == 1 assert np.min(rand) == 0 r = K.random_binomial((10, 10), p) samples = np.array([K.eval(r) for _ in range(200)]) assert np.abs(np.mean(samples) - p) < 0.015 assert np.max(samples) == 1 assert np.min(samples) == 0 def test_truncated_normal(self): mean = 0. std = 1. min_val = -2. max_val = 2. rand = K.eval(K.truncated_normal((300, 200), mean=mean, stddev=std, seed=1337)) assert rand.shape == (300, 200) assert np.abs(np.mean(rand) - mean) < 0.015 assert np.max(rand) <= max_val assert np.min(rand) >= min_val # assumption in initializers.VarianceScaling assert np.abs(np.std(rand) - std * 0.87962) < 0.015 def test_conv_invalid_use(self): dummy_x_1d = K.variable(np.ones((4, 8, 2))) dummy_w_1d = K.variable(np.ones((3, 2, 3))) dummy_x_2d = K.variable(np.ones((2, 3, 4, 5))) dummy_w_2d = K.variable(np.ones((2, 2, 3, 4))) dummy_x_3d = K.variable(np.ones((2, 3, 4, 5, 4))) dummy_w_3d = K.variable(np.ones((2, 2, 2, 3, 4))) dummy_w1x1_2d = K.variable(np.ones((1, 1, 12, 7))) with pytest.raises(ValueError): K.conv1d(dummy_x_1d, dummy_w_1d, data_format='channels_middle') with pytest.raises(ValueError): K.conv2d(dummy_x_2d, dummy_w_2d, data_format='channels_middle') with pytest.raises(ValueError): K.conv3d(dummy_x_3d, dummy_w_3d, data_format='channels_middle') if K.backend() != 'theano': with pytest.raises(ValueError): K.separable_conv2d(dummy_x_2d, dummy_w_2d, dummy_w1x1_2d, data_format='channels_middle') with pytest.raises(ValueError): K.depthwise_conv2d(dummy_x_2d, dummy_w_2d, data_format='channels_middle') if K.backend() == 'cntk': with pytest.raises(ValueError): K.separable_conv2d(dummy_x_2d, dummy_w_2d, dummy_w1x1_2d, dilation_rate=(1, 2)) with pytest.raises(ValueError): K.separable_conv2d(dummy_x_2d, dummy_w_2d, dummy_w1x1_2d, strides=(2, 2), dilation_rate=(1, 2)) with pytest.raises(ValueError): K.depthwise_conv2d(dummy_x_2d, dummy_w_2d, dilation_rate=(1, 2)) with pytest.raises(ValueError): K.depthwise_conv2d(dummy_x_2d, dummy_w_2d, strides=(2, 2), dilation_rate=(1, 2)) def test_pooling_invalid_use(self): for (input_shape, pool_size) in zip([(5, 10, 12, 3), (5, 10, 12, 6, 3)], [(2, 2), (2, 2, 2)]): x = K.variable(np.random.random(input_shape)) if len(pool_size) == 2: with pytest.raises(ValueError): K.pool2d(x, pool_size=pool_size, data_format='channels_middle') with pytest.raises(ValueError): K.pool2d(x, pool_size=pool_size, padding='twice') with pytest.raises(ValueError): K.pool2d(x, pool_size=pool_size, pool_mode='median') else: with pytest.raises(ValueError): K.pool3d(x, pool_size=pool_size, data_format='channels_middle') with pytest.raises(ValueError): K.pool3d(x, pool_size=pool_size, padding='twice') with pytest.raises(ValueError): K.pool3d(x, pool_size=pool_size, pool_mode='median') def test_resize_images(self): for data_format in ['channels_first', 'channels_last']: shape = (5, 5) if data_format == 'channels_first': x_shape = (2, 3) + shape elif data_format == 'channels_last': x_shape = (2,) + shape + (3,) check_single_tensor_operation('resize_images', x_shape, WITH_NP, cntk_dynamicity=True, height_factor=2, width_factor=2, data_format=data_format) # Test invalid use cases xval = np.random.random(x_shape) with pytest.raises(ValueError): K.resize_images(K.variable(xval), 2, 2, data_format='channels_middle') @staticmethod def _helper_bilinear(data_format, height_factor, width_factor): x_shape = (2, 3, 4, 5) check_single_tensor_operation('resize_images', x_shape, [KTF, KTH], height_factor=height_factor, width_factor=width_factor, data_format=data_format, interpolation='bilinear') @pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.') @pytest.mark.parametrize('data_format', ['channels_first', 'channels_last']) def test_resize_images_bilinear(self, data_format): self._helper_bilinear(data_format, 2, 2) with pytest.raises(NotImplementedError): self._helper_bilinear(data_format, 4, 4) def test_resize_volumes(self): for data_format in ['channels_first', 'channels_last']: shape = (5, 5, 5) if data_format == 'channels_first': x_shape = (2, 3) + shape elif data_format == 'channels_last': x_shape = (2,) + shape + (3,) check_single_tensor_operation('resize_volumes', x_shape, WITH_NP, cntk_dynamicity=True, depth_factor=2, height_factor=2, width_factor=2, data_format=data_format) # Test invalid use cases xval = np.random.random(x_shape) with pytest.raises(ValueError): K.resize_volumes(K.variable(xval), 2, 2, 2, data_format='channels_middle') def test_temporal_padding(self): check_single_tensor_operation('temporal_padding', (4, 3, 3), BACKENDS) check_single_tensor_operation('temporal_padding', (2, 3, 4), BACKENDS, padding=(1, 2)) def test_spatial_2d_padding(self): padding = ((1, 2), (2, 1)) for data_format in ['channels_first', 'channels_last']: shape = (5, 5) if data_format == 'channels_first': x_shape = (1, 3) + shape else: x_shape = (1,) + shape + (3,) check_single_tensor_operation('spatial_2d_padding', x_shape, BACKENDS, padding=padding, data_format=data_format) # Check handling of dynamic shapes. for k in [KTF, KTH]: x = k.placeholder(shape=(1, None, None, 1)) y = k.spatial_2d_padding(x, padding=padding, data_format='channels_last') assert k.int_shape(y) == (1, None, None, 1) # Test invalid use cases xval = np.random.random(x_shape) with pytest.raises(ValueError): K.spatial_2d_padding(K.variable(xval), padding=padding, data_format='channels_middle') def test_spatial_3d_padding(self): padding = ((1, 2), (2, 1), (1, 2)) for data_format in ['channels_first', 'channels_last']: shape = (5, 5, 5) if data_format == 'channels_first': x_shape = (1, 3) + shape else: x_shape = (1,) + shape + (3,) check_single_tensor_operation('spatial_3d_padding', x_shape, BACKENDS, padding=padding, data_format=data_format) # Check handling of dynamic shapes. for k in [KTF, KTH]: x = k.placeholder(shape=(1, None, None, None, 1)) y = k.spatial_3d_padding(x, padding=padding, data_format='channels_last') assert k.int_shape(y) == (1, None, None, None, 1) # Test invalid use cases xval = np.random.random(x_shape) with pytest.raises(ValueError): K.spatial_3d_padding(K.variable(xval), padding=padding, data_format='channels_middle') def test_bias_add(self): for data_format in ['channels_first', 'channels_last']: for shape in [(), (3,), (2, 3), (5, 3, 2)]: if data_format == 'channels_first': x_shape = (1, 4) + shape else: x_shape = (1,) + shape + (4,) bias_shape = (4,) check_two_tensor_operation('bias_add', x_shape, bias_shape, WITH_NP, cntk_dynamicity=True, data_format=data_format) if data_format == 'channels_first': x_shape = (20, 6, 10) else: x_shape = (20, 10, 6) check_two_tensor_operation('bias_add', x_shape, (10, 6), WITH_NP, cntk_dynamicity=True, data_format=data_format) # Test invalid use cases x = K.variable(np.random.random(x_shape)) b = K.variable(np.random.random(bias_shape)) with pytest.raises(ValueError): K.bias_add(x, b, data_format='channels_middle') def test_batchnorm(self): shape = (2, 3) for data_format in ['channels_first', 'channels_last']: if data_format == 'channels_first': x_shape = (1, 4) + shape else: x_shape = (1,) + shape + (4,) x_val = np.random.random(x_shape).astype(np.float32) xth = KTH.variable(x_val) xtf = KTF.variable(x_val) xc = KC.placeholder(x_shape) zth, _, _ = KTH.normalize_batch_in_training(xth, None, None, reduction_axes='per-activation') ztf, _, _ = KTF.normalize_batch_in_training(xtf, None, None, reduction_axes=[0, 1, 2, 3]) zc, _, _ = KC.normalize_batch_in_training(xc, None, None, reduction_axes=[0, 1, 2, 3]) zth = KTH.eval(zth) ztf = KTF.eval(ztf) zc = KC.function([xc], [zc])([x_val])[0] assert zth.shape == ztf.shape assert zth.shape == zc.shape # the Theano and TensorFlow CTC code use different methods to ensure # numerical stability. The Theano code subtracts out the max # before the final log, so the results are different but scale # identically and still train properly @pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.') def test_ctc(self): if K.backend() == 'theano': ref = [1.73308, 3.81351] else: ref = [3.34211, 5.42262] # simplified version of TensorFlow's test label_lens = np.expand_dims(np.asarray([5, 4]), 1) input_lens = np.expand_dims(np.asarray([5, 5]), 1) # number of timesteps # dimensions are batch x time x categories labels = np.asarray([[0, 1, 2, 1, 0], [0, 1, 1, 0, -1]]) inputs = np.asarray( [[[0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553], [0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436], [0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688], [0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533], [0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107]], [[0.30176, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508], [0.24082, 0.397533, 0.0557226, 0.0546814, 0.0557528, 0.19549], [0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, 0.202456], [0.280884, 0.429522, 0.0326593, 0.0339046, 0.0326856, 0.190345], [0.423286, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046]]], dtype=np.float32) k_labels = K.variable(labels, dtype="int32") k_inputs = K.variable(inputs, dtype="float32") k_input_lens = K.variable(input_lens, dtype="int32") k_label_lens = K.variable(label_lens, dtype="int32") res = K.eval(K.ctc_batch_cost(k_labels, k_inputs, k_input_lens, k_label_lens)) assert_allclose(res[0, :] if K.backend() == 'theano' else res[:, 0], ref, atol=1e-05) # test when batch_size = 1, that is, one sample only # get only first sample from above test case if K.backend() == 'theano': ref = [1.73308] else: ref = [3.34211] input_lens = np.expand_dims(np.asarray([5]), 1) label_lens = np.expand_dims(np.asarray([5]), 1) labels = np.asarray([[0, 1, 2, 1, 0]]) inputs = np.asarray( [[[0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553], [0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436], [0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688], [0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533], [0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107]]], dtype=np.float32) k_labels = K.variable(labels, dtype="int32") k_inputs = K.variable(inputs, dtype="float32") k_input_lens = K.variable(input_lens, dtype="int32") k_label_lens = K.variable(label_lens, dtype="int32") res = K.eval(K.ctc_batch_cost(k_labels, k_inputs, k_input_lens, k_label_lens)) assert_allclose(res[0, :] if K.backend() == 'theano' else res[:, 0], ref, atol=1e-05) @pytest.mark.skipif(K.backend() != 'tensorflow', reason='Test adapted from tensorflow.') def test_ctc_decode_greedy(self): """Test two batch entries - best path decoder.""" max_time_steps = 6 seq_len_0 = 4 input_prob_matrix_0 = np.asarray( [[1.0, 0.0, 0.0, 0.0], # t=0 [0.0, 0.0, 0.4, 0.6], # t=1 [0.0, 0.0, 0.4, 0.6], # t=2 [0.0, 0.9, 0.1, 0.0], # t=3 [0.0, 0.0, 0.0, 0.0], # t=4 (ignored) [0.0, 0.0, 0.0, 0.0]], # t=5 (ignored) dtype=np.float32) input_log_prob_matrix_0 = np.log(input_prob_matrix_0) seq_len_1 = 5 # dimensions are time x depth input_prob_matrix_1 = np.asarray( [[0.1, 0.9, 0.0, 0.0], # t=0 [0.0, 0.9, 0.1, 0.0], # t=1 [0.0, 0.0, 0.1, 0.9], # t=2 [0.0, 0.9, 0.1, 0.1], # t=3 [0.9, 0.1, 0.0, 0.0], # t=4 [0.0, 0.0, 0.0, 0.0]], # t=5 (ignored) dtype=np.float32) # len max_time_steps array of batch_size x depth matrices inputs = [np.vstack([input_prob_matrix_0[t, :], input_prob_matrix_1[t, :]]) for t in range(max_time_steps)] # change tensorflow order to keras backend order inputs = K.variable(np.asarray(inputs).transpose((1, 0, 2))) # batch_size length vector of sequence_lengths input_length = K.variable(np.array([seq_len_0, seq_len_1], dtype=np.int32)) # batch_size length vector of negative log probabilities log_prob_truth = np.array([ np.sum(-np.log([1.0, 0.6, 0.6, 0.9])), np.sum(-np.log([0.9, 0.9, 0.9, 0.9, 0.9])) ], np.float32)[:, np.newaxis] # keras output, unlike tensorflow, is a dense (not sparse) tensor decode_truth = np.array([[0, 1, -1], [1, 1, 0]]) decode_pred_tf, log_prob_pred_tf = K.ctc_decode(inputs, input_length, greedy=True) assert len(decode_pred_tf) == 1 decode_pred = K.eval(decode_pred_tf[0]) log_prob_pred = K.eval(log_prob_pred_tf) assert np.alltrue(decode_truth == decode_pred) assert np.allclose(log_prob_truth, log_prob_pred) @pytest.mark.skipif(K.backend() != 'tensorflow', reason='Beam search is only implemented with ' 'the TensorFlow backend.') def test_ctc_decode_beam_search(self): """Test one batch, two beams - hibernating beam search.""" depth = 6 seq_len_0 = 5 input_prob_matrix_0 = np.asarray( [[0.30999, 0.309938, 0.0679938, 0.0673362, 0.0708352, 0.173908], [0.215136, 0.439699, 0.0370931, 0.0393967, 0.0381581, 0.230517], [0.199959, 0.489485, 0.0233221, 0.0251417, 0.0233289, 0.238763], [0.279611, 0.452966, 0.0204795, 0.0209126, 0.0194803, 0.20655], [0.51286, 0.288951, 0.0243026, 0.0220788, 0.0219297, 0.129878], # Random entry added in at time=5 [0.155251, 0.164444, 0.173517, 0.176138, 0.169979, 0.160671]], dtype=np.float32) # len max_time_steps array of batch_size x depth matrices inputs = ([input_prob_matrix_0[t, :][np.newaxis, :] for t in range(seq_len_0)] + # Pad to max_time_steps = 8 2 * [np.zeros((1, depth), dtype=np.float32)]) inputs = K.variable(np.asarray(inputs).transpose((1, 0, 2))) # batch_size length vector of sequence_lengths input_length = K.variable(np.array([seq_len_0], dtype=np.int32)) # batch_size length vector of negative log probabilities log_prob_truth = np.array([ 0.584855, # output beam 0 0.389139 # output beam 1 ], np.float32)[np.newaxis, :] decode_truth = [np.array([1, 0]), np.array([0, 1, 0])] beam_width = 2 top_paths = 2 decode_pred_tf, log_prob_pred_tf = K.ctc_decode(inputs, input_length, greedy=False, beam_width=beam_width, top_paths=top_paths) assert len(decode_pred_tf) == top_paths log_prob_pred = K.eval(log_prob_pred_tf) for i in range(top_paths): assert np.alltrue(decode_truth[i] == K.eval(decode_pred_tf[i])) assert np.allclose(log_prob_truth, log_prob_pred) def test_one_hot(self): input_length = 10 num_classes = 20 batch_size = 30 indices = np.random.randint(0, num_classes, size=(batch_size, input_length)) oh = np.eye(num_classes)[indices] koh = K.eval(K.one_hot(K.variable(indices, dtype='int32'), num_classes)) assert np.all(koh == oh) @pytest.mark.skipif(K.backend() == 'cntk', reason='Sparse tensors are not supported in cntk.') def test_sparse_dot(self): x_d = np.array([0, 7, 2, 3], dtype=np.float32) x_r = np.array([0, 2, 2, 3], dtype=np.int64) x_c = np.array([4, 3, 2, 3], dtype=np.int64) x_sparse = sparse.csr_matrix((x_d, (x_r, x_c)), shape=(4, 5)) x_dense = x_sparse.toarray() W = np.random.random((5, 4)) # cntk not support it yet backends = [KTF] if KTH.th_sparse_module: # Theano has some dependency issues for sparse backends.append(KTH) for k in backends: t_W = k.variable(W) k_s = k.eval(k.dot(k.variable(x_sparse), t_W)) k_d = k.eval(k.dot(k.variable(x_dense), t_W)) assert k_s.shape == k_d.shape assert_allclose(k_s, k_d, atol=1e-05) def test_sparse_concat(self): x_d = np.array([0, 7, 2, 3], dtype=np.float32) x_r = np.array([0, 2, 2, 3], dtype=np.int64) x_c = np.array([4, 3, 2, 3], dtype=np.int64) x_sparse_1 = sparse.csr_matrix((x_d, (x_r, x_c)), shape=(4, 5)) x_d = np.array([0, 7, 2, 3], dtype=np.float32) x_r = np.array([0, 2, 2, 3], dtype=np.int64) x_c = np.array([4, 3, 2, 3], dtype=np.int64) x_sparse_2 = sparse.csr_matrix((x_d, (x_r, x_c)), shape=(4, 5)) x_dense_1 = x_sparse_1.toarray() x_dense_2 = x_sparse_2.toarray() # cntk not support it yet backends = [KTF] if KTH.th_sparse_module: # Theano has some dependency issues for sparse backends.append(KTH) for k in backends: k_s = k.concatenate([k.variable(x_sparse_1), k.variable(x_sparse_2)]) assert k.is_sparse(k_s) k_s_d = k.eval(k_s) k_d = k.eval(k.concatenate([k.variable(x_dense_1), k.variable(x_dense_2)])) assert k_s_d.shape == k_d.shape assert_allclose(k_s_d, k_d, atol=1e-05) @pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.') def test_map(self): x = np.random.rand(10, 3).astype(np.float32) vx = K.variable(x) kx = K.eval(K.map_fn(K.sum, vx)) # make sure we can also walk the indexes in tensorflow which we # can't without specifying dtype kx2 = K.eval(K.map_fn( lambda i: K.sum(vx[i]), K.arange(10), dtype=K.floatx() )) assert (10,) == kx.shape assert (10,) == kx2.shape assert_allclose(x.sum(axis=1), kx, atol=1e-05) assert_allclose(kx, kx2, atol=1e-05) @pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.') def test_foldl(self): x = np.random.rand(10, 3).astype(np.float32) kx = K.eval(K.foldl(lambda a, b: a + b, K.variable(x))) assert (3,) == kx.shape assert_allclose(x.sum(axis=0), kx, atol=1e-05) @pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.') def test_foldr(self): # This test aims to make sure that we walk the array from right to left # and checks it in the following way: multiplying left to right 1e-40 # cannot be held into a float32 so it causes an underflow while from # right to left we have no such problem and the result is larger x = np.array([1e-20, 1e-20, 10, 10, 10], dtype=np.float32) vx = K.variable(x) p1 = K.eval(K.foldl(lambda a, b: a * b, vx)) p2 = K.eval(K.foldr(lambda a, b: a * b, vx)) assert p1 < p2 assert 9e-38 < p2 <= 1e-37 @pytest.mark.skipif(K.backend() == 'cntk', reason='cntk has issues with negative number.') def test_arange(self): for test_value in (-20, 0, 1, 10): a_list = [] dtype_list = [] for k in WITH_NP: t = k.arange(test_value) a = k.eval(t) assert np.array_equal(a, np.arange(test_value)) dtype_list.append(k.dtype(t)) a_list.append(a) for i in range(len(a_list) - 1): assert np.array_equal(a_list[i], a_list[i + 1]) for start, stop, step in ((0, 5, 1), (-5, 5, 2), (0, 1, 2)): a_list = [] for k in WITH_NP: a = k.eval(k.arange(start, stop, step)) assert np.array_equal(a, np.arange(start, stop, step)) a_list.append(a) for i in range(len(a_list) - 1): assert np.array_equal(a_list[i], a_list[i + 1]) for dtype in ('int32', 'int64', 'float32', 'float64'): for k in WITH_NP: t = k.arange(10, dtype=dtype) assert k.dtype(t) == dtype for k in WITH_NP: start = k.constant(1, dtype='int32') t = k.arange(start) assert len(k.eval(t)) == 1 start = k.constant(-1, dtype='int32') t = k.arange(start) assert len(k.eval(t)) == 0 @pytest.mark.parametrize('training', [True, False]) def test_in_train_phase(self, training): check_two_tensor_operation('in_train_phase', (3, 3), (2, 2), WITH_NP, training=training) check_two_tensor_operation('in_train_phase', (2, 3), (2, 3), WITH_NP, training=training) @pytest.mark.parametrize('training', [True, False]) def test_in_test_phase(self, training): check_two_tensor_operation('in_test_phase', (3, 3), (2, 2), WITH_NP, training=training) check_two_tensor_operation('in_test_phase', (2, 3), (2, 3), WITH_NP, training=training) def test_setfloatx_incorrect_values(self): # Keep track of the old value old_floatx = floatx() # Try some incorrect values initial = floatx() for value in ['', 'beerfloat', 123]: with pytest.raises(ValueError): set_floatx(value) assert floatx() == initial # Restore old value set_floatx(old_floatx) def test_setfloatx_correct_values(self): # Keep track of the old value old_floatx = floatx() # Check correct values for value in ['float16', 'float32', 'float64']: set_floatx(value) assert floatx() == value # Restore old value set_floatx(old_floatx) @pytest.mark.skipif((K.backend() == 'cntk'), reason='cntk does not support float16') def test_set_floatx(self): """ Make sure that changes to the global floatx are effectively taken into account by the backend. """ # Keep track of the old value old_floatx = floatx() set_floatx('float16') var = variable([10]) check_dtype(var, 'float16') set_floatx('float64') var = variable([10]) check_dtype(var, 'float64') # Restore old value set_floatx(old_floatx) def test_dtype(self): assert K.dtype(K.variable(1, dtype='float64')) == 'float64' assert K.dtype(K.variable(1, dtype='float32')) == 'float32' assert K.dtype(K.variable(1, dtype='float16')) == 'float16' def test_variable_support_bool_dtype(self): # Github issue: 7819 if K.backend() == 'tensorflow': assert K.dtype(K.variable(1, dtype='int16')) == 'int16' assert K.dtype(K.variable(False, dtype='bool')) == 'bool' with pytest.raises(TypeError): K.variable('', dtype='unsupported') if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/backend/reference_operations.py0000644000000000116100000003443713355226611022751 0ustar rooteng00000000000000"""Utilities for backend functionality checks.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import scipy.signal as signal import scipy as sp from keras.backend import floatx def normalize_conv(func): def wrapper(*args, **kwargs): x = args[0] w = args[1] if x.ndim == 3: w = np.flipud(w) w = np.transpose(w, (1, 2, 0)) if kwargs['data_format'] == 'channels_last': x = np.transpose(x, (0, 2, 1)) elif x.ndim == 4: w = np.fliplr(np.flipud(w)) w = np.transpose(w, (2, 3, 0, 1)) if kwargs['data_format'] == 'channels_last': x = np.transpose(x, (0, 3, 1, 2)) else: w = np.flip(np.fliplr(np.flipud(w)), axis=2) w = np.transpose(w, (3, 4, 0, 1, 2)) if kwargs['data_format'] == 'channels_last': x = np.transpose(x, (0, 4, 1, 2, 3)) dilation_rate = kwargs.pop('dilation_rate', 1) if isinstance(dilation_rate, int): dilation_rate = (dilation_rate,) * (x.ndim - 2) for (i, d) in enumerate(dilation_rate): if d > 1: for j in range(w.shape[2 + i] - 1): w = np.insert(w, 2 * j + 1, 0, axis=2 + i) y = func(x, w, **kwargs) if kwargs['data_format'] == 'channels_last': if y.ndim == 3: y = np.transpose(y, (0, 2, 1)) elif y.ndim == 4: y = np.transpose(y, (0, 2, 3, 1)) else: y = np.transpose(y, (0, 2, 3, 4, 1)) return y return wrapper @normalize_conv def conv(x, w, padding, data_format): y = [] for i in range(x.shape[0]): _y = [] for j in range(w.shape[1]): __y = [] for k in range(w.shape[0]): __y.append(signal.convolve(x[i, k], w[k, j], mode=padding)) _y.append(np.sum(np.stack(__y, axis=-1), axis=-1)) y.append(_y) y = np.array(y) return y @normalize_conv def depthwise_conv(x, w, padding, data_format): y = [] for i in range(x.shape[0]): _y = [] for j in range(w.shape[0]): __y = [] for k in range(w.shape[1]): __y.append(signal.convolve(x[i, j], w[j, k], mode=padding)) _y.append(np.stack(__y, axis=0)) y.append(np.concatenate(_y, axis=0)) y = np.array(y) return y def separable_conv(x, w1, w2, padding, data_format): x2 = depthwise_conv(x, w1, padding=padding, data_format=data_format) return conv(x2, w2, padding=padding, data_format=data_format) def conv_transpose(x, w, output_shape, padding, data_format, dilation_rate=1): if x.ndim == 4: w = np.fliplr(np.flipud(w)) w = np.transpose(w, (0, 1, 3, 2)) else: w = np.flip(np.fliplr(np.flipud(w)), axis=2) w = np.transpose(w, (0, 1, 2, 4, 3)) if isinstance(dilation_rate, int): dilation_rate = (dilation_rate,) * (x.ndim - 2) for (i, d) in enumerate(dilation_rate): if d > 1: for j in range(w.shape[i] - 1): w = np.insert(w, 2 * j + 1, 0, axis=i) return conv(x, w, padding=padding, data_format=data_format) conv1d = conv conv2d = conv conv3d = conv depthwise_conv2d = depthwise_conv separable_conv1d = separable_conv separable_conv2d = separable_conv conv2d_transpose = conv_transpose conv3d_transpose = conv_transpose def pool(x, pool_size, strides, padding, data_format, pool_mode): if data_format == 'channels_last': if x.ndim == 3: x = np.transpose(x, (0, 2, 1)) elif x.ndim == 4: x = np.transpose(x, (0, 3, 1, 2)) else: x = np.transpose(x, (0, 4, 1, 2, 3)) if padding == 'same': pad = [(0, 0), (0, 0)] + [(s // 2, s // 2) for s in pool_size] x = np.pad(x, pad, 'constant', constant_values=-np.inf) # indexing trick x = np.pad(x, [(0, 0), (0, 0)] + [(0, 1) for _ in pool_size], 'constant', constant_values=0) if x.ndim == 3: y = [x[:, :, k:k1:strides[0]] for (k, k1) in zip(range(pool_size[0]), range(-pool_size[0], 0))] elif x.ndim == 4: y = [] for (k, k1) in zip(range(pool_size[0]), range(-pool_size[0], 0)): for (l, l1) in zip(range(pool_size[1]), range(-pool_size[1], 0)): y.append(x[:, :, k:k1:strides[0], l:l1:strides[1]]) else: y = [] for (k, k1) in zip(range(pool_size[0]), range(-pool_size[0], 0)): for (l, l1) in zip(range(pool_size[1]), range(-pool_size[1], 0)): for (m, m1) in zip(range(pool_size[2]), range(-pool_size[2], 0)): y.append(x[:, :, k:k1:strides[0], l:l1:strides[1], m:m1:strides[2]]) y = np.stack(y, axis=-1) if pool_mode == 'avg': y = np.mean(np.ma.masked_invalid(y), axis=-1).data elif pool_mode == 'max': y = np.max(y, axis=-1) if data_format == 'channels_last': if y.ndim == 3: y = np.transpose(y, (0, 2, 1)) elif y.ndim == 4: y = np.transpose(y, (0, 2, 3, 1)) else: y = np.transpose(y, (0, 2, 3, 4, 1)) return y pool2d = pool pool3d = pool def bias_add(x, y, data_format): if data_format == 'channels_first': if y.ndim > 1: y = np.reshape(y, y.shape[::-1]) for _ in range(x.ndim - y.ndim - 1): y = np.expand_dims(y, -1) else: for _ in range(x.ndim - y.ndim - 1): y = np.expand_dims(y, 0) return x + y def rnn(x, w, init, go_backwards=False, mask=None, unroll=False, input_length=None): w_i, w_h, w_o = w h = [] o = [] if go_backwards: t_list = range(x.shape[1] - 1, -1, -1) else: t_list = range(x.shape[1]) if mask is not None: from keras import backend as K np_mask = K.eval(mask) else: np_mask = None for (i, t) in enumerate(t_list): h_t = np.dot(x[:, t], w_i) if w_h is not None: prev = h[i - 1] if i > 0 else init h_t1 = np.dot(prev, w_h) if np_mask is not None: h_t1[np_mask[:, t] == 0] = prev[np_mask[:, t] == 0] else: h_t1 = 0 o_t = h_t + h_t1 if w_o is not None: o_t = np.dot(o_t, w_o) o.append(o_t) if np_mask is not None: h_t = h_t * np_mask[:, t].reshape(-1, 1) h.append(h_t + h_t1) return o[-1], np.stack(o, axis=1), np.stack(h, axis=1) _LEARNING_PHASE = True def learning_phase(): return _LEARNING_PHASE def set_learning_phase(value): global _LEARNING_PHASE _LEARNING_PHASE = value def in_train_phase(x, alt, training=None): if training is None: training = learning_phase() if training is 1 or training is True: if callable(x): return x() else: return x else: if callable(alt): return alt() else: return alt def in_test_phase(x, alt, training=None): return in_train_phase(alt, x, training=training) def relu(x, alpha=0., max_value=None, threshold=0.): y = x * (x >= threshold) if max_value is not None: y = np.clip(y, 0.0, max_value) y += alpha * (x - threshold) * (x < threshold) return y def switch(condition, then_expression, else_expression): cond_float = condition.astype(floatx()) while cond_float.ndim < then_expression.ndim: cond_float = cond_float[..., None] return cond_float * then_expression + (1 - cond_float) * else_expression def softplus(x): return np.log(1. + np.exp(x)) def elu(x, alpha=1.): return x * (x > 0) + alpha * (np.exp(x) - 1.) * (x < 0) def sigmoid(x): return 1. / (1. + np.exp(-x)) def hard_sigmoid(x): y = 0.2 * x + 0.5 y = np.minimum(y, 1.) y = np.maximum(y, 0.) return y def tanh(x): return np.tanh(x) def softmax(x, axis=-1): y = np.exp(x - np.max(x, axis, keepdims=True)) return y / np.sum(y, axis, keepdims=True) def l2_normalize(x, axis=-1): y = np.max(np.sum(x ** 2, axis, keepdims=True), axis, keepdims=True) return x / np.sqrt(y) def binary_crossentropy(target, output, from_logits=False): if not from_logits: output = np.clip(output, 1e-7, 1 - 1e-7) output = np.log(output / (1 - output)) return (target * -np.log(sigmoid(output)) + (1 - target) * -np.log(1 - sigmoid(output))) def categorical_crossentropy(target, output, from_logits=False): if from_logits: output = softmax(output) else: output /= output.sum(axis=-1, keepdims=True) output = np.clip(output, 1e-7, 1 - 1e-7) return np.sum(target * -np.log(output), axis=-1, keepdims=False) def max(x, axis=None, keepdims=False): if isinstance(axis, list): axis = tuple(axis) return np.max(x, axis=axis, keepdims=keepdims) def min(x, axis=None, keepdims=False): if isinstance(axis, list): axis = tuple(axis) return np.min(x, axis=axis, keepdims=keepdims) def mean(x, axis=None, keepdims=False): if isinstance(axis, list): axis = tuple(axis) return np.mean(x, axis=axis, keepdims=keepdims) def var(x, axis=None, keepdims=False): if isinstance(axis, list): axis = tuple(axis) return np.var(x, axis=axis, keepdims=keepdims) def std(x, axis=None, keepdims=False): if isinstance(axis, list): axis = tuple(axis) return np.std(x, axis=axis, keepdims=keepdims) def logsumexp(x, axis=None, keepdims=False): if isinstance(axis, list): axis = tuple(axis) return sp.misc.logsumexp(x, axis=axis, keepdims=keepdims) def sum(x, axis=None, keepdims=False): if isinstance(axis, list): axis = tuple(axis) return np.sum(x, axis=axis, keepdims=keepdims) def prod(x, axis=None, keepdims=False): if isinstance(axis, list): axis = tuple(axis) return np.prod(x, axis=axis, keepdims=keepdims) def cumsum(x, axis=0): return np.cumsum(x, axis=axis) def cumprod(x, axis=0): return np.cumprod(x, axis=axis) def any(x, axis=None, keepdims=False): if isinstance(axis, list): axis = tuple(axis) return np.any(x, axis=axis, keepdims=keepdims) def all(x, axis=None, keepdims=False): if isinstance(axis, list): axis = tuple(axis) return np.all(x, axis=axis, keepdims=keepdims) def argmax(x, axis=-1): return np.argmax(x, axis=axis) def argmin(x, axis=-1): return np.argmin(x, axis=axis) def sqrt(x): y = np.sqrt(x) y[np.isnan(y)] = 0. return y def pow(x, a=1.): return np.power(x, a) def clip(x, min_value, max_value): return np.clip(x, min_value, max_value) def concatenate(tensors, axis=-1): return np.concatenate(tensors, axis) def permute_dimensions(x, pattern): return np.transpose(x, pattern) def reshape(x, shape): return np.reshape(x, shape) def repeat_elements(x, rep, axis): return np.repeat(x, rep, axis=axis) def repeat(x, n): y = np.expand_dims(x, 1) y = np.repeat(y, n, axis=1) return y def tile(x, n): return np.tile(x, n) def arange(start, stop=None, step=1, dtype='int32'): return np.arange(start, stop, step, dtype) def flatten(x): return np.reshape(x, (-1,)) def batch_flatten(x): return np.reshape(x, (x.shape[0], -1)) def eval(x): return x def dtype(x): return x.dtype.name def constant(value, dtype=None, shape=None, name=None): if dtype is None: dtype = floatx() if shape is None: shape = () np_value = value * np.ones(shape) np_value.astype(dtype) return np_value def print_tensor(x, message=''): print(x, message) return x def dot(x, y): return np.dot(x, y) def transpose(x): return np.transpose(x) def reverse(x, axes): if isinstance(axes, int): axes = [axes] for a in axes: x = np.flip(x, a) return x def variable(value, dtype=None, name=None, constraint=None): if constraint is not None: raise TypeError("Constraint must be None when " "using the NumPy backend.") return np.array(value, dtype) def equal(x, y): return x == y def not_equal(x, y): return x != y def greater(x, y): return x > y def greater_equal(x, y): return x >= y def less(x, y): return x < y def less_equal(x, y): return x <= y def maximum(x, y): return np.maximum(x, y) def minimum(x, y): return np.minimum(x, y) def ndim(x): return x.ndim def random_uniform_variable(shape, low, high, dtype=None, name=None, seed=None): return (high - low) * np.random.random(shape).astype(dtype) + low def random_normal_variable(shape, mean, scale, dtype=None, name=None, seed=None): return scale * np.random.randn(*shape).astype(dtype) + mean def zeros(shape, dtype=floatx(), name=None): return np.zeros(shape, dtype=dtype) def zeros_like(x, dtype=floatx(), name=None): return np.zeros_like(x, dtype=dtype) def ones(shape, dtype=floatx(), name=None): return np.ones(shape, dtype=dtype) def ones_like(x, dtype=floatx(), name=None): return np.ones_like(x, dtype=dtype) def eye(size, dtype=None, name=None): return np.eye(size, dtype=dtype) def resize_images(x, height_factor, width_factor, data_format): if data_format == 'channels_first': x = repeat_elements(x, height_factor, axis=2) x = repeat_elements(x, width_factor, axis=3) elif data_format == 'channels_last': x = repeat_elements(x, height_factor, axis=1) x = repeat_elements(x, width_factor, axis=2) return x def resize_volumes(x, depth_factor, height_factor, width_factor, data_format): if data_format == 'channels_first': x = repeat_elements(x, depth_factor, axis=2) x = repeat_elements(x, height_factor, axis=3) x = repeat_elements(x, width_factor, axis=4) elif data_format == 'channels_last': x = repeat_elements(x, depth_factor, axis=1) x = repeat_elements(x, height_factor, axis=2) x = repeat_elements(x, width_factor, axis=3) return x square = np.square abs = np.abs exp = np.exp log = np.log round = np.round sign = np.sign expand_dims = np.expand_dims squeeze = np.squeeze cos = np.cos sin = np.sin Keras-2.2.4/tests/keras/metrics_test.py0000644000000000116100000002111513354530144017651 0ustar rooteng00000000000000import pytest import numpy as np from numpy.testing import assert_allclose import keras from keras import metrics from keras import backend as K all_metrics = [ metrics.binary_accuracy, metrics.categorical_accuracy, metrics.mean_squared_error, metrics.mean_absolute_error, metrics.mean_absolute_percentage_error, metrics.mean_squared_logarithmic_error, metrics.squared_hinge, metrics.hinge, metrics.categorical_crossentropy, metrics.binary_crossentropy, metrics.poisson, metrics.cosine_proximity, metrics.logcosh, ] all_sparse_metrics = [ metrics.sparse_categorical_accuracy, metrics.sparse_categorical_crossentropy, ] def test_metrics(): y_a = K.variable(np.random.random((6, 7))) y_b = K.variable(np.random.random((6, 7))) for metric in all_metrics: output = metric(y_a, y_b) print(metric.__name__) assert K.eval(output).shape == (6,) def test_sparse_metrics(): for metric in all_sparse_metrics: y_a = K.variable(np.random.randint(0, 7, (6,)), dtype=K.floatx()) y_b = K.variable(np.random.random((6, 7)), dtype=K.floatx()) assert K.eval(metric(y_a, y_b)).shape == (6,) def test_sparse_categorical_accuracy_correctness(): y_a = K.variable(np.random.randint(0, 7, (6,)), dtype=K.floatx()) y_b = K.variable(np.random.random((6, 7)), dtype=K.floatx()) # use one_hot embedding to convert sparse labels to equivalent dense labels y_a_dense_labels = K.cast(K.one_hot(K.cast(y_a, dtype='int32'), num_classes=7), dtype=K.floatx()) sparse_categorical_acc = metrics.sparse_categorical_accuracy(y_a, y_b) categorical_acc = metrics.categorical_accuracy(y_a_dense_labels, y_b) assert np.allclose(K.eval(sparse_categorical_acc), K.eval(categorical_acc)) def test_serialize(): '''This is a mock 'round trip' of serialize and deserialize. ''' class MockMetric: def __init__(self): self.__name__ = "mock_metric" mock = MockMetric() found = metrics.serialize(mock) assert found == "mock_metric" found = metrics.deserialize('mock_metric', custom_objects={'mock_metric': True}) assert found is True def test_invalid_get(): with pytest.raises(ValueError): metrics.get(5) @pytest.mark.skipif((K.backend() == 'cntk'), reason='CNTK backend does not support top_k yet') def test_top_k_categorical_accuracy(): y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]])) success_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, k=3)) assert success_result == 1 partial_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, k=2)) assert partial_result == 0.5 failure_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, k=1)) assert failure_result == 0 @pytest.mark.skipif((K.backend() == 'cntk'), reason='CNTK backend does not support top_k yet') @pytest.mark.parametrize('y_pred, y_true', [ # Test correctness if the shape of y_true is (num_samples, 1) (np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]), np.array([[1], [0]])), # Test correctness if the shape of y_true is (num_samples,) (np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]), np.array([1, 0])), ]) def test_sparse_top_k_categorical_accuracy(y_pred, y_true): y_pred = K.variable(y_pred) y_true = K.variable(y_true) success_result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) assert success_result == 1 partial_result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) assert partial_result == 0.5 failure_result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) assert failure_result == 0 @pytest.mark.parametrize('metrics_mode', ['list', 'dict']) def test_stateful_metrics(metrics_mode): np.random.seed(1334) class BinaryTruePositives(keras.layers.Layer): """Stateful Metric to count the total true positives over all batches. Assumes predictions and targets of shape `(samples, 1)`. # Arguments name: String, name for the metric. """ def __init__(self, name='true_positives', **kwargs): super(BinaryTruePositives, self).__init__(name=name, **kwargs) self.stateful = True self.true_positives = K.variable(value=0, dtype='int32') def reset_states(self): K.set_value(self.true_positives, 0) def __call__(self, y_true, y_pred): """Computes the number of true positives in a batch. # Arguments y_true: Tensor, batch_wise labels y_pred: Tensor, batch_wise predictions # Returns The total number of true positives seen this epoch at the completion of the batch. """ y_true = K.cast(y_true, 'int32') y_pred = K.cast(K.round(y_pred), 'int32') correct_preds = K.cast(K.equal(y_pred, y_true), 'int32') true_pos = K.cast(K.sum(correct_preds * y_true), 'int32') current_true_pos = self.true_positives * 1 self.add_update(K.update_add(self.true_positives, true_pos), inputs=[y_true, y_pred]) return current_true_pos + true_pos metric_fn = BinaryTruePositives() config = metrics.serialize(metric_fn) metric_fn = metrics.deserialize( config, custom_objects={'BinaryTruePositives': BinaryTruePositives}) # Test on simple model inputs = keras.Input(shape=(2,)) outputs = keras.layers.Dense(1, activation='sigmoid', name='out')(inputs) model = keras.Model(inputs, outputs) if metrics_mode == 'list': model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['acc', metric_fn]) elif metrics_mode == 'dict': model.compile(optimizer='sgd', loss='binary_crossentropy', metrics={'out': ['acc', metric_fn]}) samples = 1000 x = np.random.random((samples, 2)) y = np.random.randint(2, size=(samples, 1)) val_samples = 10 val_x = np.random.random((val_samples, 2)) val_y = np.random.randint(2, size=(val_samples, 1)) # Test fit and evaluate history = model.fit(x, y, validation_data=(val_x, val_y), epochs=1, batch_size=10) outs = model.evaluate(x, y, batch_size=10) preds = model.predict(x) def ref_true_pos(y_true, y_pred): return np.sum(np.logical_and(y_pred > 0.5, y_true == 1)) # Test correctness (e.g. updates should have been run) np.testing.assert_allclose(outs[2], ref_true_pos(y, preds), atol=1e-5) # Test correctness of the validation metric computation val_preds = model.predict(val_x) val_outs = model.evaluate(val_x, val_y, batch_size=10) assert_allclose(val_outs[2], ref_true_pos(val_y, val_preds), atol=1e-5) assert_allclose(val_outs[2], history.history['val_true_positives'][-1], atol=1e-5) # Test with generators gen = [(np.array([x0]), np.array([y0])) for x0, y0 in zip(x, y)] val_gen = [(np.array([x0]), np.array([y0])) for x0, y0 in zip(val_x, val_y)] history = model.fit_generator(iter(gen), epochs=1, steps_per_epoch=samples, validation_data=iter(val_gen), validation_steps=val_samples) outs = model.evaluate_generator(iter(gen), steps=samples, workers=0) preds = model.predict_generator(iter(gen), steps=samples, workers=0) # Test correctness of the metric re ref_true_pos() np.testing.assert_allclose(outs[2], ref_true_pos(y, preds), atol=1e-5) # Test correctness of the validation metric computation val_preds = model.predict_generator(iter(val_gen), steps=val_samples, workers=0) val_outs = model.evaluate_generator(iter(val_gen), steps=val_samples, workers=0) np.testing.assert_allclose(val_outs[2], ref_true_pos(val_y, val_preds), atol=1e-5) np.testing.assert_allclose(val_outs[2], history.history['val_true_positives'][-1], atol=1e-5) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/test_sequential_model.py0000644000000000116100000003464313354530144021547 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import print_function import pytest import os import numpy as np from numpy.testing import assert_allclose from keras import backend as K import keras from keras.models import Sequential from keras.layers import Dense, Activation from keras.utils import np_utils from keras.utils.test_utils import get_test_data from keras.models import model_from_json, model_from_yaml from keras import losses from keras.engine.training_utils import make_batches input_dim = 16 num_hidden = 8 num_classes = 4 batch_size = 32 epochs = 1 @pytest.fixture def in_tmpdir(tmpdir): """Runs a function in a temporary directory. Checks that the directory is empty afterwards. """ with tmpdir.as_cwd(): yield None assert not tmpdir.listdir() def test_sequential_pop(): model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim)) model.add(Dense(num_classes)) model.compile(loss='mse', optimizer='sgd') x = np.random.random((batch_size, input_dim)) y = np.random.random((batch_size, num_classes)) model.fit(x, y, epochs=1) model.pop() assert len(model.layers) == 1 assert model.output_shape == (None, num_hidden) model.compile(loss='mse', optimizer='sgd') y = np.random.random((batch_size, num_hidden)) model.fit(x, y, epochs=1) def _get_test_data(): np.random.seed(1234) train_samples = 100 test_samples = 50 (x_train, y_train), (x_test, y_test) = get_test_data(num_train=train_samples, num_test=test_samples, input_shape=(input_dim,), classification=True, num_classes=num_classes) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) return (x_train, y_train), (x_test, y_test) def test_sequential_fit_generator(): (x_train, y_train), (x_test, y_test) = _get_test_data() def data_generator(train): if train: max_batch_index = len(x_train) // batch_size else: max_batch_index = len(x_test) // batch_size i = 0 while 1: if train: yield (x_train[i * batch_size: (i + 1) * batch_size], y_train[i * batch_size: (i + 1) * batch_size]) else: yield (x_test[i * batch_size: (i + 1) * batch_size], y_test[i * batch_size: (i + 1) * batch_size]) i += 1 i = i % max_batch_index model = Sequential() model.add(Dense(num_hidden, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(num_classes)) model.pop() model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.fit_generator(data_generator(True), 5, epochs) model.fit_generator(data_generator(True), 5, epochs, validation_data=(x_test, y_test)) model.fit_generator(data_generator(True), 5, epochs, validation_data=data_generator(False), validation_steps=3) model.fit_generator(data_generator(True), 5, epochs, max_queue_size=2) model.evaluate(x_train, y_train) def test_sequential(in_tmpdir): (x_train, y_train), (x_test, y_test) = _get_test_data() # TODO: factor out def data_generator(x, y, batch_size=50): index_array = np.arange(len(x)) while 1: batches = make_batches(len(x_test), batch_size) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] x_batch = x[batch_ids] y_batch = y[batch_ids] yield (x_batch, y_batch) model = Sequential() model.add(Dense(num_hidden, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2, validation_split=0.1) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, shuffle=False) model.train_on_batch(x_train[:32], y_train[:32]) loss = model.evaluate(x_test, y_test) prediction = model.predict_generator(data_generator(x_test, y_test), 1, max_queue_size=2, verbose=1) gen_loss = model.evaluate_generator(data_generator(x_test, y_test, 50), 1, max_queue_size=2) pred_loss = K.eval(K.mean(losses.get(model.loss)(K.variable(y_test), K.variable(prediction)))) assert(np.isclose(pred_loss, loss)) assert(np.isclose(gen_loss, loss)) model.predict(x_test, verbose=0) model.predict_classes(x_test, verbose=0) model.predict_proba(x_test, verbose=0) fname = 'test_sequential_temp.h5' model.save_weights(fname, overwrite=True) model = Sequential() model.add(Dense(num_hidden, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.load_weights(fname) os.remove(fname) nloss = model.evaluate(x_test, y_test, verbose=0) assert(loss == nloss) # Test serialization config = model.get_config() assert 'name' in config new_model = Sequential.from_config(config) assert new_model.weights # Model should be built. model.summary() json_str = model.to_json() model_from_json(json_str) yaml_str = model.to_yaml() model_from_yaml(yaml_str) def test_nested_sequential(in_tmpdir): (x_train, y_train), (x_test, y_test) = _get_test_data() inner = Sequential() inner.add(Dense(num_hidden, input_shape=(input_dim,))) inner.add(Activation('relu')) inner.add(Dense(num_classes)) middle = Sequential() middle.add(inner) model = Sequential() model.add(middle) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2, validation_split=0.1) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, shuffle=False) model.train_on_batch(x_train[:32], y_train[:32]) loss = model.evaluate(x_test, y_test, verbose=0) model.predict(x_test, verbose=0) model.predict_classes(x_test, verbose=0) model.predict_proba(x_test, verbose=0) fname = 'test_nested_sequential_temp.h5' model.save_weights(fname, overwrite=True) inner = Sequential() inner.add(Dense(num_hidden, input_shape=(input_dim,))) inner.add(Activation('relu')) inner.add(Dense(num_classes)) middle = Sequential() middle.add(inner) model = Sequential() model.add(middle) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.load_weights(fname) os.remove(fname) nloss = model.evaluate(x_test, y_test, verbose=0) assert(loss == nloss) # Test serialization config = model.get_config() Sequential.from_config(config) model.summary() json_str = model.to_json() model_from_json(json_str) yaml_str = model.to_yaml() model_from_yaml(yaml_str) def test_sequential_count_params(): input_dim = 20 num_units = 10 num_classes = 2 n = input_dim * num_units + num_units n += num_units * num_units + num_units n += num_units * num_classes + num_classes model = Sequential() model.add(Dense(num_units, input_shape=(input_dim,))) model.add(Dense(num_units)) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.build() assert(n == model.count_params()) model.compile('sgd', 'binary_crossentropy') assert(n == model.count_params()) def test_nested_sequential_trainability(): input_dim = 20 num_units = 10 num_classes = 2 inner_model = Sequential() inner_model.add(Dense(num_units, input_shape=(input_dim,))) model = Sequential() model.add(inner_model) model.add(Dense(num_classes)) assert len(model.trainable_weights) == 4 inner_model.trainable = False assert len(model.trainable_weights) == 2 inner_model.trainable = True assert len(model.trainable_weights) == 4 def test_rebuild_model(): model = Sequential() model.add(Dense(128, input_shape=(784,))) model.add(Dense(64)) assert(model.get_layer(index=-1).output_shape == (None, 64)) model.add(Dense(32)) assert(model.get_layer(index=-1).output_shape == (None, 32)) def test_clone_functional_model(): val_a = np.random.random((10, 4)) val_b = np.random.random((10, 4)) val_out = np.random.random((10, 4)) input_a = keras.Input(shape=(4,)) input_b = keras.Input(shape=(4,)) dense_1 = keras.layers.Dense(4) dense_2 = keras.layers.Dense(4) x_a = dense_1(input_a) x_a = keras.layers.Dropout(0.5)(x_a) x_a = keras.layers.BatchNormalization()(x_a) x_b = dense_1(input_b) x_a = dense_2(x_a) outputs = keras.layers.add([x_a, x_b]) model = keras.models.Model([input_a, input_b], outputs) if K.backend() == 'tensorflow': # Everything should work in a new session. K.clear_session() # With placeholder creation new_model = keras.models.clone_model(model) new_model.compile('rmsprop', 'mse') new_model.train_on_batch([val_a, val_b], val_out) # On top of new tensors input_a = keras.Input(shape=(4,), name='a') input_b = keras.Input(shape=(4,), name='b') new_model = keras.models.clone_model( model, input_tensors=[input_a, input_b]) new_model.compile('rmsprop', 'mse') new_model.train_on_batch([val_a, val_b], val_out) # On top of new, non-Keras tensors input_a = keras.backend.variable(val_a) input_b = keras.backend.variable(val_b) new_model = keras.models.clone_model( model, input_tensors=[input_a, input_b]) new_model.compile('rmsprop', 'mse') new_model.train_on_batch(None, val_out) def test_clone_sequential_model(): val_a = np.random.random((10, 4)) val_out = np.random.random((10, 4)) model = keras.models.Sequential() model.add(keras.layers.Dense(4, input_shape=(4,))) model.add(keras.layers.BatchNormalization()) model.add(keras.layers.Dropout(0.5)) model.add(keras.layers.Dense(4)) if K.backend() == 'tensorflow': # Everything should work in a new session. K.clear_session() # With placeholder creation new_model = keras.models.clone_model(model) new_model.compile('rmsprop', 'mse') new_model.train_on_batch(val_a, val_out) # On top of new tensor input_a = keras.Input(shape=(4,)) new_model = keras.models.clone_model( model, input_tensors=input_a) new_model.compile('rmsprop', 'mse') new_model.train_on_batch(val_a, val_out) # On top of new, non-Keras tensor input_a = keras.backend.variable(val_a) new_model = keras.models.clone_model( model, input_tensors=input_a) new_model.compile('rmsprop', 'mse') new_model.train_on_batch(None, val_out) def test_sequential_update_disabling(): val_a = np.random.random((10, 4)) val_out = np.random.random((10, 4)) model = keras.models.Sequential() model.add(keras.layers.BatchNormalization(input_shape=(4,))) model.trainable = False assert not model.updates model.compile('sgd', 'mse') assert not model.updates x1 = model.predict(val_a) model.train_on_batch(val_a, val_out) x2 = model.predict(val_a) assert_allclose(x1, x2, atol=1e-7) model.trainable = True model.compile('sgd', 'mse') assert model.updates model.train_on_batch(val_a, val_out) x2 = model.predict(val_a) assert np.abs(np.sum(x1 - x2)) > 1e-5 def test_sequential_deferred_build(): model = keras.models.Sequential() model.add(keras.layers.Dense(3)) model.add(keras.layers.Dense(3)) model.compile('sgd', 'mse') assert model.built is False assert len(model.layers) == 2 assert len(model.weights) == 0 model.train_on_batch( np.random.random((2, 4)), np.random.random((2, 3))) assert model.built is True assert len(model.layers) == 2 assert len(model.weights) == 4 # Test serialization config = model.get_config() assert 'name' in config new_model = Sequential.from_config(config) assert new_model.built is True assert len(new_model.layers) == 2 assert len(new_model.weights) == 4 def test_nested_sequential_deferred_build(): inner_model = keras.models.Sequential() inner_model.add(keras.layers.Dense(3)) inner_model.add(keras.layers.Dense(3)) model = keras.models.Sequential() model.add(inner_model) model.add(keras.layers.Dense(5)) model.compile('sgd', 'mse') assert inner_model.built is False assert len(inner_model.layers) == 2 assert len(inner_model.weights) == 0 assert model.built is False assert len(model.layers) == 2 assert len(model.weights) == 0 model.train_on_batch( np.random.random((2, 4)), np.random.random((2, 5))) assert inner_model.built is True assert len(inner_model.layers) == 2 assert len(inner_model.weights) == 4 assert model.built is True assert len(model.layers) == 2 assert len(model.weights) == 6 config = model.get_config() new_model = keras.models.Sequential.from_config(config) assert new_model.built is True assert len(new_model.layers) == 2 assert len(new_model.weights) == 6 new_inner_model = new_model.layers[0] assert new_inner_model.built is True assert len(new_inner_model.layers) == 2 assert len(new_inner_model.weights) == 4 if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/preprocessing/0000755000000000116100000000000013355226624017463 5ustar rooteng00000000000000Keras-2.2.4/tests/keras/losses_test.py0000644000000000116100000001155513210317577017526 0ustar rooteng00000000000000import pytest import numpy as np import keras from keras import losses from keras import backend as K from keras.utils.generic_utils import custom_object_scope allobj = [losses.mean_squared_error, losses.mean_absolute_error, losses.mean_absolute_percentage_error, losses.mean_squared_logarithmic_error, losses.squared_hinge, losses.hinge, losses.categorical_crossentropy, losses.binary_crossentropy, losses.kullback_leibler_divergence, losses.poisson, losses.cosine_proximity, losses.logcosh, losses.categorical_hinge] def test_objective_shapes_3d(): y_a = K.variable(np.random.random((5, 6, 7))) y_b = K.variable(np.random.random((5, 6, 7))) for obj in allobj: objective_output = obj(y_a, y_b) assert K.eval(objective_output).shape == (5, 6) def test_objective_shapes_2d(): y_a = K.variable(np.random.random((6, 7))) y_b = K.variable(np.random.random((6, 7))) for obj in allobj: objective_output = obj(y_a, y_b) assert K.eval(objective_output).shape == (6,) def test_cce_one_hot(): y_a = K.variable(np.random.randint(0, 7, (5, 6))) y_b = K.variable(np.random.random((5, 6, 7))) objective_output = losses.sparse_categorical_crossentropy(y_a, y_b) assert K.eval(objective_output).shape == (5, 6) y_a = K.variable(np.random.randint(0, 7, (6,))) y_b = K.variable(np.random.random((6, 7))) assert K.eval(losses.sparse_categorical_crossentropy(y_a, y_b)).shape == (6,) def test_categorical_hinge(): y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]])) expected_loss = ((0.3 - 0.2 + 1) + (0.7 - 0.1 + 1)) / 2.0 loss = K.eval(losses.categorical_hinge(y_true, y_pred)) assert np.isclose(expected_loss, np.mean(loss)) def test_sparse_categorical_crossentropy(): y_pred = K.variable(np.array([[0.3, 0.6, 0.1], [0.1, 0.2, 0.7]])) y_true = K.variable(np.array([1, 2])) expected_loss = - (np.log(0.6) + np.log(0.7)) / 2 loss = K.eval(losses.sparse_categorical_crossentropy(y_true, y_pred)) assert np.isclose(expected_loss, np.mean(loss)) def test_sparse_categorical_crossentropy_4d(): y_pred = K.variable(np.array([[[[0.7, 0.1, 0.2], [0.0, 0.3, 0.7], [0.1, 0.1, 0.8]], [[0.3, 0.7, 0.0], [0.3, 0.4, 0.3], [0.2, 0.5, 0.3]], [[0.8, 0.1, 0.1], [1.0, 0.0, 0.0], [0.4, 0.3, 0.3]]]])) y_true = K.variable(np.array([[[0, 1, 0], [2, 1, 0], [2, 2, 1]]])) expected_loss = - (np.log(0.7) + np.log(0.3) + np.log(0.1) + np.log(K.epsilon()) + np.log(0.4) + np.log(0.2) + np.log(0.1) + np.log(K.epsilon()) + np.log(0.3)) / 9 loss = K.eval(losses.sparse_categorical_crossentropy(y_true, y_pred)) assert np.isclose(expected_loss, np.mean(loss)) class MSE_MAE_loss: """Loss function with internal state, for testing serialization code.""" def __init__(self, mse_fraction): self.mse_fraction = mse_fraction def __call__(self, y_true, y_pred): return (self.mse_fraction * losses.mse(y_true, y_pred) + (1 - self.mse_fraction) * losses.mae(y_true, y_pred)) def get_config(self): return {'mse_fraction': self.mse_fraction} def test_serializing_loss_class(): orig_loss_class = MSE_MAE_loss(0.3) with custom_object_scope({'MSE_MAE_loss': MSE_MAE_loss}): serialized = losses.serialize(orig_loss_class) with custom_object_scope({'MSE_MAE_loss': MSE_MAE_loss}): deserialized = losses.deserialize(serialized) assert isinstance(deserialized, MSE_MAE_loss) assert deserialized.mse_fraction == 0.3 def test_serializing_model_with_loss_class(tmpdir): model_filename = str(tmpdir / 'custom_loss.hdf') with custom_object_scope({'MSE_MAE_loss': MSE_MAE_loss}): loss = MSE_MAE_loss(0.3) inputs = keras.layers.Input((2,)) outputs = keras.layers.Dense(1, name='model_output')(inputs) model = keras.models.Model(inputs, outputs) model.compile(optimizer='sgd', loss={'model_output': loss}) model.fit(np.random.rand(256, 2), np.random.rand(256, 1)) model.save(model_filename) with custom_object_scope({'MSE_MAE_loss': MSE_MAE_loss}): loaded_model = keras.models.load_model(model_filename) loaded_model.predict(np.random.rand(128, 2)) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/engine/0000755000000000116100000000000013355226624016045 5ustar rooteng00000000000000Keras-2.2.4/tests/keras/engine/test_training.py0000644000000000116100000016070713355226611021300 0ustar rooteng00000000000000import threading import pytest import numpy as np import pandas as pd from numpy.testing import assert_allclose import sys import scipy.sparse as sparse import keras from keras import losses from keras.layers import Activation, Dense, Dropout, Conv2D, Concatenate from keras.engine import Input from keras.engine.training import Model from keras.engine import training_utils from keras.utils.generic_utils import slice_arrays from keras.models import Sequential from keras import backend as K from keras.utils import Sequence from keras.callbacks import LambdaCallback class RandomSequence(Sequence): def __init__(self, batch_size, sequence_length=12): self.batch_size = batch_size self.sequence_length = sequence_length self.logs = [] # It will work for use_multiprocessing=False def __len__(self): return self.sequence_length def __getitem__(self, idx): self.logs.append(idx) return ([np.random.random((self.batch_size, 3)), np.random.random((self.batch_size, 3))], [np.random.random((self.batch_size, 4)), np.random.random((self.batch_size, 3))]) def on_epoch_end(self): pass class threadsafe_iter: """Takes an iterator/generator and makes it thread-safe by serializing call to the `next` method of given iterator/generator. """ def __init__(self, it): self.it = it self.lock = threading.Lock() def __iter__(self): return self def __next__(self): return self.next() def next(self): with self.lock: return next(self.it) def threadsafe_generator(f): """A decorator that takes a generator function and makes it thread-safe. """ def g(*a, **kw): return threadsafe_iter(f(*a, **kw)) return g def test_check_array_length_consistency(): training_utils.check_array_length_consistency(None, None, None) a_np = np.random.random((4, 3, 3)) training_utils.check_array_length_consistency(a_np, a_np, a_np) training_utils.check_array_length_consistency( [a_np, a_np], [a_np, a_np], [a_np, a_np]) training_utils.check_array_length_consistency([None], [None], [None]) b_np = np.random.random((3, 4)) with pytest.raises(ValueError): training_utils.check_array_length_consistency(a_np, None, None) with pytest.raises(ValueError): training_utils.check_array_length_consistency(a_np, a_np, None) with pytest.raises(ValueError): training_utils.check_array_length_consistency([a_np], [None], None) with pytest.raises(ValueError): training_utils.check_array_length_consistency([a_np], [b_np], None) with pytest.raises(ValueError): training_utils.check_array_length_consistency([a_np], None, [b_np]) def testslice_arrays(): input_a = np.random.random((10, 3)) slice_arrays(None) slice_arrays(input_a, 0) slice_arrays(input_a, 0, 1) slice_arrays(input_a, stop=2) input_a = [None, [1, 1], None, [1, 1]] slice_arrays(input_a, 0) slice_arrays(input_a, 0, 1) slice_arrays(input_a, stop=2) input_a = [None] slice_arrays(input_a, 0) slice_arrays(input_a, 0, 1) slice_arrays(input_a, stop=2) input_a = None slice_arrays(input_a, 0) slice_arrays(input_a, 0, 1) slice_arrays(input_a, stop=2) def test_weighted_masked_objective(): a = Input(shape=(3,), name='input_a') # weighted_masked_objective def mask_dummy(y_true=None, y_pred=None, weight=None): return K.placeholder(y_true.shape) weighted_function = training_utils.weighted_masked_objective( losses.categorical_crossentropy) weighted_function(a, a, None) def test_model_methods(): a = Input(shape=(3,), name='input_a') b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # training/testing doesn't work before compiling. with pytest.raises(RuntimeError): model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, epochs=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5, validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, epochs=1, batch_size=4, validation_split=0.5, validation_data=( {'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np})) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np}) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10,))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] trained_batches = [] # define tracer callback def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) def on_batch_begin(batch, logs): trained_batches.append(batch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin, on_batch_begin=on_batch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] @threadsafe_generator def gen_data(batch_sz): while True: yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))], [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))]) out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function def mse(y_true, y_pred): return K.mean(K.pow(y_true - y_pred, 2)) model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * (1 + 1) # total loss + 2 outputs * (loss + metric) assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # enable verbose for evaluate_generator out = model.evaluate_generator(gen_data(4), steps=3, verbose=1) # empty batch with pytest.raises(ValueError): @threadsafe_generator def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.evaluate_generator(gen_data(), steps=1) # x is not a list of numpy arrays. with pytest.raises(ValueError): out = model.predict([None]) # x does not match _feed_input_names. with pytest.raises(ValueError): out = model.predict([input_a_np, None, input_b_np]) with pytest.raises(ValueError): out = model.predict([None, input_a_np, input_b_np]) # all input/output/weight arrays should have the same number of samples. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np[:2]], [output_a_np, output_b_np], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np[:2]], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[sample_weight[1], sample_weight[1][:2]]) # `sample_weight` is neither a dict nor a list. with pytest.raises(TypeError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=tuple(sample_weight)) # `validation_data` is neither a tuple nor a triple. with pytest.raises(ValueError): out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np],)) # `loss` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss=['mse', 'mae', 'mape']) # `loss_weights` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5}) # `loss_weights` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights=[0.5]) # `loss_weights` is invalid type. with pytest.raises(TypeError): model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5)) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'}) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode=['temporal']) # `sample_weight_mode` matches output_names partially. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'}) # `loss` does not exist. with pytest.raises(ValueError): model.compile(optimizer, loss=[]) model.compile(optimizer, loss=['mse', 'mae']) model.compile(optimizer, loss='mse', loss_weights={'dense_1': 0.2, 'dropout': 0.8}) model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8]) # the rank of weight arrays should be 1. with pytest.raises(ValueError): out = model.train_on_batch( [input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[None, np.random.random((10, 20, 30))]) model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': None, 'dropout': 'temporal'}) model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal']) # the rank of output arrays should be at least 3D. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) trained_epochs = [] trained_batches = [] val_seq = RandomSequence(4) out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=3, epochs=5, initial_epoch=0, validation_data=val_seq, validation_steps=3, max_queue_size=1, callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(3)) * 5 assert len(val_seq.logs) <= 4 * 5 # steps_per_epoch will be equal to len of sequence if it's unspecified trained_epochs = [] trained_batches = [] val_seq = RandomSequence(4) out = model.fit_generator(generator=RandomSequence(3), epochs=5, initial_epoch=0, validation_data=val_seq, callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(12)) * 5 assert len(val_seq.logs) == 12 * 5 # test for workers = 0 trained_epochs = [] trained_batches = [] val_seq = RandomSequence(4) out = model.fit_generator(generator=RandomSequence(3), epochs=5, validation_data=val_seq, callbacks=[tracker_cb], workers=0) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(12)) * 5 assert len(val_seq.logs) == 12 * 5 # fit_generator will throw an exception # if steps is unspecified for regular generator with pytest.raises(ValueError): @threadsafe_generator def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.fit_generator(generator=gen_data(), epochs=5, initial_epoch=0, validation_data=gen_data(), callbacks=[tracker_cb]) # Check if generator is only accessed an expected number of times gen_counters = [0, 0] @threadsafe_generator def gen_data(i): while True: gen_counters[i] += 1 yield ([np.random.random((1, 3)), np.random.random((1, 3))], [np.random.random((1, 4)), np.random.random((1, 3))]) out = model.fit_generator(generator=gen_data(0), epochs=3, steps_per_epoch=2, validation_data=gen_data(1), validation_steps=1, max_queue_size=2, workers=2) # Need range check here as filling # of the queue depends on sleep in the enqueuers max_train = 3 * 2 + 2 * 2 min_train = 2 * 3 assert min_train <= gen_counters[0] <= max_train # 12 = (epoch * workers * validation steps * max_queue_size) assert 3 <= gen_counters[1] <= 12 gen_counters = [0] out = model.fit_generator(generator=RandomSequence(3), epochs=3, validation_data=gen_data(0), validation_steps=1, max_queue_size=2, workers=2) # 12 = (epoch * workers * validation steps * max_queue_size) # Need range check here as filling # of the queue depends on sleep in the enqueuers assert 3 <= gen_counters[0] <= 12 # predict_generator output shape behavior should be consistent def expected_shape(batch_size, n_batches): return (batch_size * n_batches, 4), (batch_size * n_batches, 3) # Multiple outputs and one step. batch_size = 5 sequence_length = 1 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Multiple outputs and multiple steps. batch_size = 5 sequence_length = 2 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Create a model with a single output. single_output_model = Model([a, b], a_2) single_output_model.compile(optimizer, loss, metrics=[], sample_weight_mode=None) # Single output and one step. batch_size = 5 sequence_length = 1 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0 # Single output and multiple steps. batch_size = 5 sequence_length = 2 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0 @pytest.mark.skipif(sys.version_info < (3,), reason='Cannot catch warnings in python 2') def test_warnings(): a = Input(shape=(3,), name='input_a') b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) @threadsafe_generator def gen_data(batch_sz): while True: yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))], [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))]) with pytest.warns(Warning) as w: out = model.fit_generator(gen_data(4), steps_per_epoch=10, use_multiprocessing=True, workers=2) warning_raised = any(['Sequence' in str(w_.message) for w_ in w]) assert warning_raised, 'No warning raised when using generator with processes.' with pytest.warns(None) as w: out = model.fit_generator(RandomSequence(3), steps_per_epoch=4, use_multiprocessing=True, workers=2) assert all(['Sequence' not in str(w_.message) for w_ in w]), ( 'A warning was raised for Sequence.') def test_sparse_inputs_targets(): test_inputs = [sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)] test_outputs = [sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)] in1 = Input(shape=(3,)) in2 = Input(shape=(3,)) out1 = Dropout(0.5, name='dropout')(in1) out2 = Dense(4, name='dense_1')(in2) model = Model([in1, in2], [out1, out2]) model.predict(test_inputs, batch_size=2) model.compile('rmsprop', 'mse') model.fit(test_inputs, test_outputs, epochs=1, batch_size=2, validation_split=0.5) model.evaluate(test_inputs, test_outputs, batch_size=2) @pytest.mark.skipif(K.backend() != 'tensorflow', reason='sparse operations supported only by TensorFlow') def test_sparse_placeholder_fit(): test_inputs = [sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)] test_outputs = [sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)] in1 = Input(shape=(3,)) in2 = Input(shape=(3,), sparse=True) out1 = Dropout(0.5, name='dropout')(in1) out2 = Dense(4, name='dense_1')(in2) model = Model([in1, in2], [out1, out2]) model.predict(test_inputs, batch_size=2) model.compile('rmsprop', 'mse') model.fit(test_inputs, test_outputs, epochs=1, batch_size=2, validation_split=0.5) model.evaluate(test_inputs, test_outputs, batch_size=2) def test_trainable_argument(): x = np.random.random((5, 3)) y = np.random.random((5, 2)) model = Sequential() model.add(Dense(2, input_dim=3, trainable=False)) model.compile('rmsprop', 'mse') out = model.predict(x) model.train_on_batch(x, y) out_2 = model.predict(x) assert_allclose(out, out_2) # test with nesting inputs = Input(shape=(3,)) outputs = model(inputs) model = Model(inputs, outputs) model.compile('rmsprop', 'mse') out = model.predict(x) model.train_on_batch(x, y) out_2 = model.predict(x) assert_allclose(out, out_2) def test_with_list_as_targets(): model = Sequential() model.add(Dense(1, input_dim=3, trainable=False)) model.compile('rmsprop', 'mse') x = np.random.random((2, 3)) y = [0, 1] model.train_on_batch(x, y) def test_check_not_failing(): a = np.random.random((2, 1, 3)) training_utils.check_loss_and_target_compatibility( [a], [losses.categorical_crossentropy], [a.shape]) training_utils.check_loss_and_target_compatibility( [a], [losses.categorical_crossentropy], [(2, None, 3)]) def test_check_last_is_one(): a = np.random.random((2, 3, 1)) with pytest.raises(ValueError) as exc: training_utils.check_loss_and_target_compatibility( [a], [losses.categorical_crossentropy], [a.shape]) assert 'You are passing a target array' in str(exc) def test_check_bad_shape(): a = np.random.random((2, 3, 5)) with pytest.raises(ValueError) as exc: training_utils.check_loss_and_target_compatibility( [a], [losses.categorical_crossentropy], [(2, 3, 6)]) assert 'targets to have the same shape' in str(exc) @pytest.mark.skipif(K.backend() != 'tensorflow', reason='Requires TensorFlow backend') def test_model_with_input_feed_tensor(): """We test building a model with a TF variable as input. We should be able to call fit, evaluate, predict, by only passing them data for the placeholder inputs in the model. """ import tensorflow as tf input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) model.summary() optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=['mean_squared_error'], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch(input_b_np, [output_a_np, output_b_np]) out = model.train_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.predict_on_batch({'input_b': input_b_np}) # test fit out = model.fit({'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=10) out = model.fit(input_b_np, [output_a_np, output_b_np], epochs=1, batch_size=10) # test evaluate out = model.evaluate({'input_b': input_b_np}, [output_a_np, output_b_np], batch_size=10) out = model.evaluate(input_b_np, [output_a_np, output_b_np], batch_size=10) # test predict out = model.predict({'input_b': input_b_np}, batch_size=10) out = model.predict(input_b_np, batch_size=10) assert len(out) == 2 # Now test a model with a single input # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Same, without learning phase # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) def test_model_with_partial_loss(): a = Input(shape=(3,), name='input_a') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') a_3 = dp(a_2) model = Model(a, [a_2, a_3]) optimizer = 'rmsprop' loss = {'dropout': 'mse'} model.compile(optimizer, loss, metrics=['mae']) input_a_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) # test train_on_batch out = model.train_on_batch(input_a_np, output_a_np) out = model.test_on_batch(input_a_np, output_a_np) # fit out = model.fit(input_a_np, [output_a_np]) # evaluate out = model.evaluate(input_a_np, [output_a_np]) # Same without dropout. a = Input(shape=(3,), name='input_a') a_2 = Dense(4, name='dense_1')(a) a_3 = Dense(4, name='dense_2')(a_2) model = Model(a, [a_2, a_3]) optimizer = 'rmsprop' loss = {'dense_2': 'mse'} model.compile(optimizer, loss, metrics={'dense_1': 'mae'}) # test train_on_batch out = model.train_on_batch(input_a_np, output_a_np) out = model.test_on_batch(input_a_np, output_a_np) # fit out = model.fit(input_a_np, [output_a_np]) # evaluate out = model.evaluate(input_a_np, [output_a_np]) @pytest.mark.skipif((K.backend() == 'cntk'), reason='cntk does not support external loss yet') def test_model_with_external_loss(): # None loss, only regularization loss. a = Input(shape=(3,), name='input_a') a_2 = Dense(4, name='dense_1', kernel_regularizer='l1', bias_regularizer='l2')(a) dp = Dropout(0.5, name='dropout') a_3 = dp(a_2) model = Model(a, [a_2, a_3]) optimizer = 'rmsprop' loss = None model.compile(optimizer, loss, metrics=['mae']) input_a_np = np.random.random((10, 3)) # test train_on_batch out = model.train_on_batch(input_a_np, None) out = model.test_on_batch(input_a_np, None) # fit out = model.fit(input_a_np, None) # evaluate out = model.evaluate(input_a_np, None) # No dropout, external loss. a = Input(shape=(3,), name='input_a') a_2 = Dense(4, name='dense_1')(a) a_3 = Dense(4, name='dense_2')(a) model = Model(a, [a_2, a_3]) model.add_loss(K.mean(a_3 + a_2)) optimizer = 'rmsprop' loss = None model.compile(optimizer, loss, metrics=['mae']) # test train_on_batch out = model.train_on_batch(input_a_np, None) out = model.test_on_batch(input_a_np, None) # fit out = model.fit(input_a_np, None) # evaluate out = model.evaluate(input_a_np, None) # Test fit with no external data at all. if K.backend() == 'tensorflow': import tensorflow as tf a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.add_loss(K.mean(a_2)) model.compile(optimizer='rmsprop', loss=None, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, None) out = model.test_on_batch(None, None) out = model.predict_on_batch(None) # test fit with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, batch_size=10) out = model.fit(None, None, epochs=1, steps_per_epoch=1) # define a generator to produce x=None and y=None @threadsafe_generator def data_tensors_generator(): while True: yield (None, None) generator = data_tensors_generator() # test fit_generator for framework-native data tensors out = model.fit_generator(generator, epochs=1, steps_per_epoch=3) # test evaluate_generator for framework-native data tensors out = model.evaluate_generator(generator, steps=3) # test fit with validation data with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, steps_per_epoch=None, validation_steps=2) out = model.fit(None, None, epochs=1, steps_per_epoch=2, validation_steps=2) # test evaluate with pytest.raises(ValueError): out = model.evaluate(None, None, batch_size=10) out = model.evaluate(None, None, steps=3) # test predict with pytest.raises(ValueError): out = model.predict(None, batch_size=10) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Test multi-output model without external data. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_1 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_1) model = Model(a, [a_1, a_2]) model.add_loss(K.mean(a_2)) model.compile(optimizer='rmsprop', loss=None, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, None) out = model.test_on_batch(None, None) out = model.predict_on_batch(None) # test fit with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, batch_size=10) out = model.fit(None, None, epochs=1, steps_per_epoch=1) # test fit with validation data with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, steps_per_epoch=None, validation_steps=2) out = model.fit(None, None, epochs=1, steps_per_epoch=2, validation_steps=2) # test evaluate with pytest.raises(ValueError): out = model.evaluate(None, None, batch_size=10) out = model.evaluate(None, None, steps=3) # test predict with pytest.raises(ValueError): out = model.predict(None, batch_size=10) out = model.predict(None, steps=3) assert len(out) == 2 assert out[0].shape == (10 * 3, 4) assert out[1].shape == (10 * 3, 4) def test_target_tensors(): # single-output, as list model = keras.models.Sequential() model.add(keras.layers.Dense(4, input_shape=(4,), name='dense')) input_val = np.random.random((10, 4)) target_val = np.random.random((10, 4)) target = keras.backend.variable(target_val) model.compile(optimizer='rmsprop', loss='mse', target_tensors=[target]) model.train_on_batch(input_val, None) # single-output, as dict model.compile(optimizer='rmsprop', loss='mse', target_tensors={'dense': target}) model.train_on_batch(input_val, None) # single-output, as tensor model.compile(optimizer='rmsprop', loss='mse', target_tensors=target) model.train_on_batch(input_val, None) # test invalid arguments with pytest.raises(TypeError): model.compile(optimizer='rmsprop', loss='mse', target_tensors=set()) with pytest.raises(ValueError): model.compile(optimizer='rmsprop', loss='mse', target_tensors=[target, target]) with pytest.raises(ValueError): model.compile(optimizer='rmsprop', loss='mse', target_tensors={'dense2': None}) with pytest.raises(ValueError): model.compile(optimizer='rmsprop', loss='mse', target_tensors=[target]) model.train_on_batch(input_val, target_val) # multi-output, as list input_val = np.random.random((10, 4)) target_val_a = np.random.random((10, 4)) target_val_b = np.random.random((10, 4)) target_a = keras.backend.variable(target_val_a) target_b = keras.backend.variable(target_val_b) inputs = keras.layers.Input(shape=(4,)) output_a = keras.layers.Dense(4, name='dense_a')(inputs) output_b = keras.layers.Dense(4, name='dense_b')(inputs) model = keras.models.Model(inputs, [output_a, output_b]) model.compile(optimizer='rmsprop', loss='mse', target_tensors=[target_a, target_b]) model.train_on_batch(input_val, None) # multi-output, as dict model.compile(optimizer='rmsprop', loss='mse', target_tensors={'dense_a': target_a, 'dense_b': target_b}) model.train_on_batch(input_val, None) # multi-output, not enough target tensors when `target_tensors` is not a dict with pytest.raises(ValueError, match='When passing a list as `target_tensors`, it should ' 'have one entry per model output. The model has \d ' 'outputs, but you passed target_tensors='): model.compile(optimizer='rmsprop', loss='mse', target_tensors=[target_a]) with pytest.raises(ValueError, match='The model has \d outputs, but you passed a single ' 'tensor as `target_tensors`. Expected a list or ' 'a dict of tensors.'): model.compile(optimizer='rmsprop', loss='mse', target_tensors=target_a) # test with sample weights model.compile(optimizer='rmsprop', loss='mse', target_tensors=[target_a, target_b]) model.train_on_batch(input_val, None, sample_weight={'dense_a': np.random.random((10,))}) def test_model_custom_target_tensors(): a = Input(shape=(3,), name='input_a') b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) y = K.placeholder([10, 4], name='y') y1 = K.placeholder([10, 3], name='y1') y2 = K.placeholder([7, 5], name='y2') model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] # test list of target tensors with pytest.raises(ValueError): model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors=[y, y1, y2]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors=[y, y1]) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], {y: np.random.random((10, 4)), y1: np.random.random((10, 3))}) # test dictionary of target_tensors with pytest.raises(ValueError): model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors={'does_not_exist': y2}) # test dictionary of target_tensors model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors={'dense_1': y, 'dropout': y1}) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], {y: np.random.random((10, 4)), y1: np.random.random((10, 3))}) if K.backend() == 'tensorflow': import tensorflow as tf # test with custom TF placeholder as target pl_target_a = tf.placeholder('float32', shape=(None, 4)) model.compile(optimizer='rmsprop', loss='mse', target_tensors={'dense_1': pl_target_a}) model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) @pytest.mark.skipif(sys.version_info < (3,), reason='Cannot catch warnings in python 2') def test_trainable_weights_count_consistency(): """Tests the trainable weights consistency check of Model. This verifies that a warning is shown if model.trainable is modified and the model is summarized/run without a new call to .compile() Reproduce issue #8121 """ a = Input(shape=(3,), name='input_a') model1 = Model(inputs=a, outputs=Dense(1)(a)) model1.trainable = False b = Input(shape=(3,), name='input_b') y = model1(b) model2 = Model(inputs=b, outputs=Dense(1)(y)) model2.compile(optimizer='adam', loss='mse') model1.trainable = True # Should warn on .summary() with pytest.warns(UserWarning) as w: model2.summary() warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w]) assert warning_raised, ( 'No warning raised when trainable is modified without .compile.') # And on .fit() with pytest.warns(UserWarning) as w: model2.fit(x=np.zeros((5, 3)), y=np.zeros((5, 1))) warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w]) assert warning_raised, ( 'No warning raised when trainable is modified without .compile.') # And shouldn't warn if we recompile model2.compile(optimizer='adam', loss='mse') with pytest.warns(None) as w: model2.summary() assert len(w) == 0, ( 'Warning raised even when .compile() is called after modifying .trainable') def test_pandas_dataframe(): input_a = Input(shape=(3,), name='input_a') input_b = Input(shape=(3,), name='input_b') x = Dense(4, name='dense_1')(input_a) y = Dense(3, name='desne_2')(input_b) model_1 = Model(inputs=input_a, outputs=x) model_2 = Model(inputs=[input_a, input_b], outputs=[x, y]) optimizer = 'rmsprop' loss = 'mse' model_1.compile(optimizer=optimizer, loss=loss) model_2.compile(optimizer=optimizer, loss=loss) input_a_df = pd.DataFrame(np.random.random((10, 3))) input_b_df = pd.DataFrame(np.random.random((10, 3))) output_a_df = pd.DataFrame(np.random.random((10, 4))) output_b_df = pd.DataFrame(np.random.random((10, 3))) model_1.fit(input_a_df, output_a_df) model_2.fit([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.fit([input_a_df], [output_a_df]) model_1.fit({'input_a': input_a_df}, output_a_df) model_2.fit({'input_a': input_a_df, 'input_b': input_b_df}, [output_a_df, output_b_df]) model_1.predict(input_a_df) model_2.predict([input_a_df, input_b_df]) model_1.predict([input_a_df]) model_1.predict({'input_a': input_a_df}) model_2.predict({'input_a': input_a_df, 'input_b': input_b_df}) model_1.predict_on_batch(input_a_df) model_2.predict_on_batch([input_a_df, input_b_df]) model_1.predict_on_batch([input_a_df]) model_1.predict_on_batch({'input_a': input_a_df}) model_2.predict_on_batch({'input_a': input_a_df, 'input_b': input_b_df}) model_1.evaluate(input_a_df, output_a_df) model_2.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.evaluate([input_a_df], [output_a_df]) model_1.evaluate({'input_a': input_a_df}, output_a_df) model_2.evaluate({'input_a': input_a_df, 'input_b': input_b_df}, [output_a_df, output_b_df]) model_1.train_on_batch(input_a_df, output_a_df) model_2.train_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.train_on_batch([input_a_df], [output_a_df]) model_1.train_on_batch({'input_a': input_a_df}, output_a_df) model_2.train_on_batch({'input_a': input_a_df, 'input_b': input_b_df}, [output_a_df, output_b_df]) model_1.test_on_batch(input_a_df, output_a_df) model_2.test_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.test_on_batch([input_a_df], [output_a_df]) model_1.test_on_batch({'input_a': input_a_df}, output_a_df) model_2.test_on_batch({'input_a': input_a_df, 'input_b': input_b_df}, [output_a_df, output_b_df]) @pytest.mark.skipif(K.backend() != 'tensorflow', reason='Requires TensorFlow') @pytest.mark.skipif((K.backend() == 'tensorflow' and not hasattr(K.get_session(), '_make_callable_from_options')), reason='Requires TF 1.8 or higher') def test_training_and_eval_methods_on_symbolic_tensors_single_io(): x = keras.layers.Input(shape=(3,), name='input') y = keras.layers.Dense(4, name='dense')(x) model = keras.Model(x, y) optimizer = 'rmsprop' loss = 'mse' metrics = ['mae'] model.compile(optimizer, loss, metrics=metrics) inputs = keras.backend.zeros(shape=(10, 3)) targets = keras.backend.zeros(shape=(10, 4)) model.fit(inputs, targets, epochs=1, steps_per_epoch=2, verbose=0) model.evaluate(inputs, targets, steps=2, verbose=0) model.predict(inputs, steps=2) model.train_on_batch(inputs, targets) model.test_on_batch(inputs, targets) model.fit(inputs, targets, epochs=1, steps_per_epoch=2, verbose=1, validation_data=(inputs, targets), validation_steps=2) @pytest.mark.skipif(K.backend() != 'tensorflow', reason='Requires TensorFlow') @pytest.mark.skipif((K.backend() == 'tensorflow' and not hasattr(K.get_session(), '_make_callable_from_options')), reason='Requires TF 1.8 or higher') def test_training_and_eval_methods_on_symbolic_tensors_multi_io(): a = keras.layers.Input(shape=(3,), name='input_a') b = keras.layers.Input(shape=(3,), name='input_b') dense = keras.layers.Dense(4, name='dense') c = dense(a) d = dense(b) e = keras.layers.Dropout(0.5, name='dropout')(c) model = keras.models.Model([a, b], [d, e]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] metrics = ['mae'] model.compile(optimizer, loss, metrics=metrics, loss_weights=loss_weights) input_a_tf = keras.backend.zeros(shape=(10, 3)) input_b_tf = keras.backend.zeros(shape=(10, 3)) output_d_tf = keras.backend.zeros(shape=(10, 4)) output_e_tf = keras.backend.zeros(shape=(10, 4)) model.fit( [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], epochs=1, steps_per_epoch=2, verbose=0) with pytest.raises(ValueError) as excinfo: model.fit( [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], epochs=1, batch_size=5, verbose=0) assert 'should specify the `steps_per_epoch`' in str(excinfo.value) model.train_on_batch([input_a_tf, input_b_tf], [output_d_tf, output_e_tf]) # Test with dictionary inputs model.fit( {'input_a': input_a_tf, 'input_b': input_b_tf}, {'dense': output_d_tf, 'dropout': output_e_tf}, epochs=1, steps_per_epoch=2, verbose=0) model.fit( {'input_a': input_a_tf, 'input_b': input_b_tf}, {'dense': output_d_tf, 'dropout': output_e_tf}, validation_data=({'input_a': input_a_tf, 'input_b': input_b_tf}, {'dense': output_d_tf, 'dropout': output_e_tf}), epochs=1, steps_per_epoch=2, validation_steps=2, verbose=0) model.train_on_batch( {'input_a': input_a_tf, 'input_b': input_b_tf}, {'dense': output_d_tf, 'dropout': output_e_tf}) # Test with validation data model.fit( [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], validation_data=([input_a_tf, input_b_tf], [output_d_tf, output_e_tf]), epochs=1, steps_per_epoch=2, validation_steps=2, verbose=0) # Test with validation split with pytest.raises(ValueError) as excinfo: model.fit( [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], epochs=2, steps_per_epoch=2, verbose=0, validation_split=0.2, validation_steps=2) assert 'you cannot use `validation_split`' in str(excinfo.value) # Test evaluation / prediction methods model.evaluate([input_a_tf, input_b_tf], [output_d_tf, output_e_tf], steps=2, verbose=0) model.predict([input_a_tf, input_b_tf], steps=2) model.test_on_batch([input_a_tf, input_b_tf], [output_d_tf, output_e_tf]) def test_model_with_crossentropy_losses_channels_first(): """Tests use of all crossentropy losses with `channels_first`. Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`, and `binary_crossentropy`. Verifies that evaluate gives the same result with either `channels_first` or `channels_last` image_data_format. Tests PR #9715. """ def prepare_simple_model(input_tensor, loss_name, target): axis = 1 if K.image_data_format() == 'channels_first' else -1 if loss_name == 'sparse_categorical_crossentropy': loss = lambda y_true, y_pred: K.sparse_categorical_crossentropy( y_true, y_pred, axis=axis) num_channels = np.amax(target) + 1 activation = 'softmax' elif loss_name == 'categorical_crossentropy': loss = lambda y_true, y_pred: K.categorical_crossentropy( y_true, y_pred, axis=axis) num_channels = target.shape[axis] activation = 'softmax' elif loss_name == 'binary_crossentropy': loss = lambda y_true, y_pred: K.binary_crossentropy(y_true, y_pred) num_channels = target.shape[axis] activation = 'sigmoid' predictions = Conv2D(num_channels, 1, activation=activation, kernel_initializer='ones', bias_initializer='ones')(input_tensor) simple_model = Model(inputs=input_tensor, outputs=predictions) simple_model.compile(optimizer='rmsprop', loss=loss) return simple_model losses_to_test = ['sparse_categorical_crossentropy', 'categorical_crossentropy', 'binary_crossentropy'] data_channels_first = np.array([[[[8., 7.1, 0.], [4.5, 2.6, 0.55], [0.9, 4.2, 11.2]]]], dtype=np.float32) # Labels for testing 4-class sparse_categorical_crossentropy, 4-class # categorical_crossentropy, and 2-class binary_crossentropy: labels_channels_first = [np.array([[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]]), np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 0]], [[1, 0, 0], [0, 0, 1], [0, 1, 0]], [[0, 0, 0], [1, 0, 0], [0, 0, 1]], [[0, 0, 1], [0, 0, 0], [1, 0, 0]]]]), np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 1]], [[1, 0, 1], [1, 0, 1], [1, 1, 0]]]])] # Compute one loss for each loss function in the list `losses_to_test`: loss_channels_last = [0., 0., 0.] loss_channels_first = [0., 0., 0.] old_data_format = K.image_data_format() # Evaluate a simple network with channels last, with all three loss # functions: K.set_image_data_format('channels_last') data = np.moveaxis(data_channels_first, 1, -1) for index, loss_function in enumerate(losses_to_test): labels = np.moveaxis(labels_channels_first[index], 1, -1) inputs = Input(shape=(3, 3, 1)) model = prepare_simple_model(inputs, loss_function, labels) loss_channels_last[index] = model.evaluate(x=data, y=labels, batch_size=1, verbose=0) # Evaluate the same network with channels first, with all three loss # functions: K.set_image_data_format('channels_first') data = data_channels_first for index, loss_function in enumerate(losses_to_test): labels = labels_channels_first[index] inputs = Input(shape=(1, 3, 3)) model = prepare_simple_model(inputs, loss_function, labels) loss_channels_first[index] = model.evaluate(x=data, y=labels, batch_size=1, verbose=0) K.set_image_data_format(old_data_format) assert_allclose(loss_channels_first, loss_channels_last, err_msg='{}{}'.format('Computed different losses for ', 'channels_first and channels_last.')) def test_dynamic_set_inputs(): model = Sequential() model.add(Dense(16, input_dim=32)) model.add(Activation('relu')) model2 = Sequential() model2.add(model.layers[-1]) model2.add(Dense(8)) preds2 = model2.predict([np.random.random((1, 32))]) assert preds2.shape == (1, 8) model3 = Model(inputs=model.inputs, outputs=model.outputs) with pytest.raises(ValueError): model3._set_inputs(model.inputs) model3.inputs = None model3._set_inputs(model.inputs) preds3 = model3.predict([np.random.random((1, 32))]) assert preds3.shape == (1, 16) model3.inputs = None model3._set_inputs(model.input) preds3 = model3.predict(np.random.random((1, 32))) assert preds3.shape == (1, 16) aux_input = Input(shape=(5,), name='aux_input') aux_model = Dense(3)(aux_input) model4 = Model(inputs=model.inputs + [aux_input], outputs=Concatenate()(model.outputs + [aux_model])) model4.inputs = None model4._set_inputs(model.inputs + [aux_input]) preds4 = model4.predict([np.random.random((1, 32)), np.random.random((1, 5))]) assert preds4.shape == (1, 19) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/keras/engine/test_topology.py0000644000000000116100000006703413354530144021336 0ustar rooteng00000000000000import pytest import json import numpy as np from keras.layers import Dense, Dropout, Conv2D, InputLayer from keras import layers from keras.engine import Input, Layer, saving, get_source_inputs from keras.models import Model, Sequential from keras import backend as K from keras.models import model_from_json, model_from_yaml from keras.initializers import Constant skipif_no_tf_gpu = pytest.mark.skipif( (K.backend() != 'tensorflow' or not K.tensorflow_backend._get_available_gpus()), reason='Requires TensorFlow backend and a GPU') def test_get_updates_for(): a = Input(shape=(2,)) dense_layer = Dense(1) dense_layer.add_update(0, inputs=a) dense_layer.add_update(1, inputs=None) assert dense_layer.get_updates_for(a) == [0] assert dense_layer.get_updates_for(None) == [1] def test_get_losses_for(): a = Input(shape=(2,)) dense_layer = Dense(1) dense_layer.add_loss(0, inputs=a) dense_layer.add_loss(1, inputs=None) assert dense_layer.get_losses_for(a) == [0] assert dense_layer.get_losses_for(None) == [1] def test_trainable_weights(): a = Input(shape=(2,)) b = Dense(1)(a) model = Model(a, b) weights = model.weights assert model.trainable_weights == weights assert model.non_trainable_weights == [] model.trainable = False assert model.trainable_weights == [] assert model.non_trainable_weights == weights model.trainable = True assert model.trainable_weights == weights assert model.non_trainable_weights == [] model.layers[1].trainable = False assert model.trainable_weights == [] assert model.non_trainable_weights == weights # sequential model model = Sequential() model.add(Dense(1, input_dim=2)) weights = model.weights assert model.trainable_weights == weights assert model.non_trainable_weights == [] model.trainable = False assert model.trainable_weights == [] assert model.non_trainable_weights == weights model.trainable = True assert model.trainable_weights == weights assert model.non_trainable_weights == [] model.layers[0].trainable = False assert model.trainable_weights == [] assert model.non_trainable_weights == weights def test_valid_compute_mask(): model = Sequential() model.add(Dense(1, input_dim=2)) assert model.layers[0].supports_masking is True assert model.layers[0].compute_mask([model.input], [0., 1.]) == [0., 1.] def test_invalid_compute_mask(): model = Sequential() model.add(Conv2D(1, [2, 2], input_shape=[3, 3, 1])) assert model.layers[0].supports_masking is False assert model.layers[0].compute_mask([model.input], [None]) is None mask = np.array([[0., 1.], [1., 0.]]) with pytest.raises(TypeError): model.layers[0].compute_mask([model.input], [mask]) with pytest.raises(TypeError): model.layers[0].compute_mask([model.input], mask) def test_get_layer(): model = Sequential() model.add(Dense(1, input_dim=2)) with pytest.raises(ValueError): model.get_layer(index=5) with pytest.raises(ValueError): model.get_layer(index=None) with pytest.raises(ValueError): model.get_layer(name='conv') def test_learning_phase(): a = Input(shape=(32,), name='input_a') b = Input(shape=(32,), name='input_b') a_2 = Dense(16, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) assert not a_2._uses_learning_phase assert b_2._uses_learning_phase # test merge m = layers.concatenate([a_2, b_2]) assert m._uses_learning_phase # Test recursion model = Model([a, b], [a_2, b_2]) print(model.input_spec) assert model.uses_learning_phase c = Input(shape=(32,), name='input_c') d = Input(shape=(32,), name='input_d') c_2, b_2 = model([c, d]) assert c_2._uses_learning_phase assert b_2._uses_learning_phase # try actually running graph fn = K.function(model.inputs + [K.learning_phase()], model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs_no_dp = fn([input_a_np, input_b_np, 0]) fn_outputs_dp = fn([input_a_np, input_b_np, 1]) # output a: nothing changes assert fn_outputs_no_dp[0].sum() == fn_outputs_dp[0].sum() # output b: dropout applied assert fn_outputs_no_dp[1].sum() != fn_outputs_dp[1].sum() def test_layer_call_arguments(): # Test the ability to pass and serialize arguments to `call`. inp = layers.Input(shape=(2,)) x = layers.Dense(3)(inp) x = layers.Dropout(0.5)(x, training=True) model = Model(inp, x) assert not model.uses_learning_phase # Test that argument is kept when applying the model inp2 = layers.Input(shape=(2,)) out2 = model(inp2) assert not out2._uses_learning_phase # Test that argument is kept after loading a model config = model.get_config() model = Model.from_config(config) assert not model.uses_learning_phase def test_node_construction(): #################################################### # test basics a = Input(shape=(32,), name='input_a') b = Input(shape=(32,), name='input_b') assert a._keras_shape == (None, 32) a_layer, a_node_index, a_tensor_index = a._keras_history b_layer, b_node_index, b_tensor_index = b._keras_history assert len(a_layer._inbound_nodes) == 1 assert a_tensor_index is 0 node = a_layer._inbound_nodes[a_node_index] assert node.outbound_layer == a_layer assert isinstance(node.inbound_layers, list) assert node.inbound_layers == [] assert isinstance(node.input_tensors, list) assert node.input_tensors == [a] assert isinstance(node.input_masks, list) assert node.input_masks == [None] assert isinstance(node.input_shapes, list) assert node.input_shapes == [(None, 32)] assert isinstance(node.output_tensors, list) assert node.output_tensors == [a] assert isinstance(node.output_shapes, list) assert node.output_shapes == [(None, 32)] assert isinstance(node.output_masks, list) assert node.output_masks == [None] dense = Dense(16, name='dense_1') a_2 = dense(a) b_2 = dense(b) assert len(dense._inbound_nodes) == 2 assert len(dense._outbound_nodes) == 0 assert dense._inbound_nodes[0].inbound_layers == [a_layer] assert dense._inbound_nodes[0].outbound_layer == dense assert dense._inbound_nodes[1].inbound_layers == [b_layer] assert dense._inbound_nodes[1].outbound_layer == dense assert dense._inbound_nodes[0].input_tensors == [a] assert dense._inbound_nodes[1].input_tensors == [b] assert dense._inbound_nodes[0].get_config()['inbound_layers'] == ['input_a'] assert dense._inbound_nodes[1].get_config()['inbound_layers'] == ['input_b'] # test layer properties test_layer = Dense(16, name='test_layer') a_test = test_layer(a) assert K.int_shape(test_layer.kernel) == (32, 16) assert test_layer.input == a assert test_layer.output == a_test assert test_layer.input_mask is None assert test_layer.output_mask is None assert test_layer.input_shape == (None, 32) assert test_layer.output_shape == (None, 16) with pytest.raises(AttributeError): dense.input with pytest.raises(AttributeError): dense.output with pytest.raises(AttributeError): dense.input_mask with pytest.raises(AttributeError): dense.output_mask assert dense.get_input_at(0) == a assert dense.get_input_at(1) == b assert dense.get_output_at(0) == a_2 assert dense.get_output_at(1) == b_2 assert dense.get_input_shape_at(0) == (None, 32) assert dense.get_input_shape_at(1) == (None, 32) assert dense.get_output_shape_at(0) == (None, 16) assert dense.get_output_shape_at(1) == (None, 16) assert dense.get_input_mask_at(0) is None assert dense.get_input_mask_at(1) is None assert dense.get_output_mask_at(0) is None assert dense.get_output_mask_at(1) is None def test_multi_input_layer(): #################################################### # test multi-input layer a = Input(shape=(32,), name='input_a') b = Input(shape=(32,), name='input_b') dense = Dense(16, name='dense_1') a_2 = dense(a) b_2 = dense(b) merged = layers.concatenate([a_2, b_2], name='merge') assert merged._keras_shape == (None, 16 * 2) merge_layer, merge_node_index, merge_tensor_index = merged._keras_history assert merge_node_index == 0 assert merge_tensor_index == 0 assert len(merge_layer._inbound_nodes) == 1 assert len(merge_layer._outbound_nodes) == 0 assert len(merge_layer._inbound_nodes[0].input_tensors) == 2 assert len(merge_layer._inbound_nodes[0].inbound_layers) == 2 c = Dense(64, name='dense_2')(merged) d = Dense(5, name='dense_3')(c) model = Model(inputs=[a, b], outputs=[c, d], name='model') assert len(model.layers) == 6 expected_shapes = [(None, 64), (None, 5)] assert model.compute_output_shape([(None, 32), (None, 32)]) == expected_shapes assert model.compute_mask([a, b], [None, None]) == [None, None] assert model.compute_output_shape([(None, 32), (None, 32)]) == expected_shapes # we don't check names of first 2 layers (inputs) because # ordering of same-level layers is not fixed expected_names = ['dense_1', 'merge', 'dense_2', 'dense_3'] assert [l.name for l in model.layers][2:] == expected_names assert [l.name for l in model._input_layers] == ['input_a', 'input_b'] assert [l.name for l in model._output_layers] == ['dense_2', 'dense_3'] # actually run model fn = K.function(model.inputs, model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)] # test get_source_inputs assert get_source_inputs(c) == [a, b] # serialization / deserialization json_config = model.to_json() recreated_model = model_from_json(json_config) recreated_model.compile('rmsprop', 'mse') assert [l.name for l in recreated_model.layers][2:] == expected_names assert [l.name for l in recreated_model._input_layers] == ['input_a', 'input_b'] assert [l.name for l in recreated_model._output_layers] == ['dense_2', 'dense_3'] fn = K.function(recreated_model.inputs, recreated_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)] def test_recursion(): #################################################### # test recursion a = Input(shape=(32,), name='input_a') b = Input(shape=(32,), name='input_b') dense = Dense(16, name='dense_1') a_2 = dense(a) b_2 = dense(b) merged = layers.concatenate([a_2, b_2], name='merge') c = Dense(64, name='dense_2')(merged) d = Dense(5, name='dense_3')(c) model = Model(inputs=[a, b], outputs=[c, d], name='model') e = Input(shape=(32,), name='input_e') f = Input(shape=(32,), name='input_f') g, h = model([e, f]) # g2, h2 = model([e, f]) assert g._keras_shape == c._keras_shape assert h._keras_shape == d._keras_shape # test separate manipulation of different layer outputs i = Dense(7, name='dense_4')(h) final_model = Model(inputs=[e, f], outputs=[i, g], name='final') assert len(final_model.inputs) == 2 assert len(final_model.outputs) == 2 assert len(final_model.layers) == 4 # we don't check names of first 2 layers (inputs) because # ordering of same-level layers is not fixed expected_shapes = [(10, 7), (10, 64)] assert [layer.name for layer in final_model.layers][2:] == ['model', 'dense_4'] assert model.compute_mask([e, f], [None, None]) == [None, None] assert final_model.compute_output_shape([(10, 32), (10, 32)]) == expected_shapes # run recursive model fn = K.function(final_model.inputs, final_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)] # test serialization model_config = final_model.get_config() print(json.dumps(model_config, indent=4)) recreated_model = Model.from_config(model_config) fn = K.function(recreated_model.inputs, recreated_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)] #################################################### # test multi-input multi-output j = Input(shape=(32,), name='input_j') k = Input(shape=(32,), name='input_k') m, n = model([j, k]) o = Input(shape=(32,), name='input_o') p = Input(shape=(32,), name='input_p') q, r = model([o, p]) assert n._keras_shape == (None, 5) assert q._keras_shape == (None, 64) s = layers.concatenate([n, q], name='merge_nq') assert s._keras_shape == (None, 64 + 5) # test with single output as 1-elem list multi_io_model = Model([j, k, o, p], [s]) fn = K.function(multi_io_model.inputs, multi_io_model.outputs) fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32))]) assert [x.shape for x in fn_outputs] == [(10, 69)] # test with single output as tensor multi_io_model = Model([j, k, o, p], s) fn = K.function(multi_io_model.inputs, multi_io_model.outputs) fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32))]) # note that the output of the K.function will still be a 1-elem list assert [x.shape for x in fn_outputs] == [(10, 69)] # test serialization model_config = multi_io_model.get_config() recreated_model = Model.from_config(model_config) fn = K.function(recreated_model.inputs, recreated_model.outputs) fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32))]) # note that the output of the K.function will still be a 1-elem list assert [x.shape for x in fn_outputs] == [(10, 69)] config = model.get_config() Model.from_config(config) model.summary() json_str = model.to_json() model_from_json(json_str) yaml_str = model.to_yaml() model_from_yaml(yaml_str) #################################################### # test invalid graphs # input is not an Input tensor j = Input(shape=(32,), name='input_j') j = Dense(32)(j) k = Input(shape=(32,), name='input_k') m, n = model([j, k]) with pytest.raises(ValueError): Model([j, k], [m, n]) # disconnected graph j = Input(shape=(32,), name='input_j') k = Input(shape=(32,), name='input_k') m, n = model([j, k]) with pytest.raises(ValueError): Model([j], [m, n]) # redundant outputs j = Input(shape=(32,), name='input_j') k = Input(shape=(32,), name='input_k') m, n = model([j, k]) # this should work with a warning Model([j, k], [m, n, n]) # redundant inputs j = Input(shape=(32,), name='input_j') k = Input(shape=(32,), name='input_k') m, n = model([j, k]) with pytest.raises(ValueError): Model([j, k, j], [m, n]) # i have not idea what I'm doing: garbage as inputs/outputs j = Input(shape=(32,), name='input_j') k = Input(shape=(32,), name='input_k') m, n = model([j, k]) with pytest.raises(ValueError): Model([j, k], [m, n, 0]) #################################################### # test calling layers/models on TF tensors if K._BACKEND == 'tensorflow': import tensorflow as tf j = Input(shape=(32,), name='input_j') k = Input(shape=(32,), name='input_k') m, n = model([j, k]) tf_model = Model([j, k], [m, n]) j_tf = tf.placeholder(dtype=K.floatx()) k_tf = tf.placeholder(dtype=K.floatx()) m_tf, n_tf = tf_model([j_tf, k_tf]) assert m_tf.get_shape().as_list() == [None, 64] assert n_tf.get_shape().as_list() == [None, 5] # test merge layers.concatenate([j_tf, k_tf], axis=1) layers.add([j_tf, k_tf]) # test tensor input x = tf.placeholder(shape=(None, 2), dtype=K.floatx()) InputLayer(input_tensor=x) x = Input(tensor=x) Dense(2)(x) def test_load_layers(): from keras.layers import ConvLSTM2D, TimeDistributed from keras.layers import Bidirectional, Conv2D, Input from keras.models import Model if K.backend() == 'tensorflow' or K.backend() == 'cntk': inputs = Input(shape=(10, 20, 20, 1)) else: inputs = Input(shape=(10, 1, 20, 20)) td_conv = TimeDistributed(Conv2D(15, (5, 5)))(inputs) bi_conv = Bidirectional(ConvLSTM2D(10, (3, 3)), merge_mode='concat')(td_conv) model = Model(inputs=inputs, outputs=bi_conv) weight_value_tuples = [] # TimeDistributed Conv2D layer # use 'channels_first' data format to check that # the function is being called correctly for Conv2D # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) weight_tensor_td_conv_old = list() weight_tensor_td_conv_old.append(np.zeros((15, 1, 5, 5))) weight_tensor_td_conv_old.append(np.zeros((15,))) td_conv_layer = model.layers[1] td_conv_layer.layer.data_format = 'channels_first' weight_tensor_td_conv_new = saving.preprocess_weights_for_loading( td_conv_layer, weight_tensor_td_conv_old, original_keras_version='1') symbolic_weights = td_conv_layer.weights assert (len(symbolic_weights) == len(weight_tensor_td_conv_new)) weight_value_tuples += zip(symbolic_weights, weight_tensor_td_conv_new) # Bidirectional ConvLSTM2D layer # old ConvLSTM2D took a list of 12 weight tensors, # returns a list of 3 concatenated larger tensors. weights_bi_conv_old = [] for j in range(2): # bidirectional for i in range(4): weights_bi_conv_old.append(np.zeros((3, 3, 15, 10))) # kernel weights_bi_conv_old.append(np.zeros((3, 3, 10, 10))) # recurrent kernel weights_bi_conv_old.append(np.zeros((10,))) # bias bi_convlstm_layer = model.layers[2] weights_bi_conv_new = saving.preprocess_weights_for_loading( bi_convlstm_layer, weights_bi_conv_old, original_keras_version='1') symbolic_weights = bi_convlstm_layer.weights assert (len(symbolic_weights) == len(weights_bi_conv_new)) weight_value_tuples += zip(symbolic_weights, weights_bi_conv_new) K.batch_set_value(weight_value_tuples) assert np.all(K.eval(model.layers[1].weights[0]) == weight_tensor_td_conv_new[0]) assert np.all(K.eval(model.layers[1].weights[1]) == weight_tensor_td_conv_new[1]) assert np.all(K.eval(model.layers[2].weights[0]) == weights_bi_conv_new[0]) assert np.all(K.eval(model.layers[2].weights[1]) == weights_bi_conv_new[1]) assert np.all(K.eval(model.layers[2].weights[2]) == weights_bi_conv_new[2]) assert np.all(K.eval(model.layers[2].weights[3]) == weights_bi_conv_new[3]) assert np.all(K.eval(model.layers[2].weights[4]) == weights_bi_conv_new[4]) assert np.all(K.eval(model.layers[2].weights[5]) == weights_bi_conv_new[5]) def convert_weights(layer, weights): if layer.__class__.__name__ == 'GRU': W = [np.split(w, 3, axis=-1) for w in weights] return sum(map(list, zip(*W)), []) elif layer.__class__.__name__ in ('LSTM', 'ConvLSTM2D'): W = [np.split(w, 4, axis=-1) for w in weights] for w in W: w[2], w[1] = w[1], w[2] return sum(map(list, zip(*W)), []) elif layer.__class__.__name__ == 'Conv2DTranspose': return [np.transpose(weights[0], (2, 3, 0, 1)), weights[1]] return weights @pytest.mark.parametrize("layer", [ layers.GRU(2, input_shape=[3, 5]), layers.LSTM(2, input_shape=[3, 5]), layers.ConvLSTM2D(5, (3, 3), input_shape=[6, 6, 6, 6], data_format='channels_first'), ], ids=['GRU', 'LSTM', 'ConvLSTM2D']) def test_preprocess_weights_for_loading(layer): # A model is needed to initialize weights. _ = Sequential([layer]) weights1 = layer.get_weights() weights2 = saving.preprocess_weights_for_loading( layer, convert_weights(layer, weights1), original_keras_version='1') assert all([np.allclose(x, y, 1e-5) for (x, y) in zip(weights1, weights2)]) @pytest.mark.parametrize("layer", [ layers.Conv2D(2, (3, 3), input_shape=[5, 5, 3]), layers.Conv2DTranspose(2, (5, 5), input_shape=[7, 7, 3], data_format='channels_first'), ], ids=['Conv2D', 'Conv2DTranspose']) def test_preprocess_weights_for_loading_for_model(layer): model = Sequential([layer]) weights1 = model.get_weights() weights2 = saving.preprocess_weights_for_loading( model, convert_weights(layer, weights1), original_keras_version='1') assert all([np.allclose(x, y, 1e-5) for (x, y) in zip(weights1, weights2)]) @pytest.mark.parametrize('layer_class,args', [ (layers.GRU, {'units': 2, 'input_shape': [3, 5]}), (layers.GRU, {'units': 2, 'input_shape': [3, 5], 'reset_after': True}), (layers.LSTM, {'units': 2, 'input_shape': [3, 5]}), ]) def test_preprocess_weights_for_loading_rnn_should_be_idempotent(layer_class, args): """ Loading weights from a RNN class to itself should not convert the weights. """ # layer can be instantiated only for supported backends layer = layer_class(**args) # A model is needed to initialize weights. _ = Sequential([layer]) weights1 = layer.get_weights() weights2 = saving.preprocess_weights_for_loading(layer, weights1) assert all([np.allclose(x, y, 1e-5) for (x, y) in zip(weights1, weights2)]) @pytest.mark.parametrize('layer_class,args', [ (layers.CuDNNGRU, {'units': 2, 'input_shape': [3, 5]}), (layers.CuDNNLSTM, {'units': 2, 'input_shape': [3, 5]}), ]) @skipif_no_tf_gpu def test_preprocess_weights_for_loading_cudnn_rnn_should_be_idempotent(layer_class, args): test_preprocess_weights_for_loading_rnn_should_be_idempotent(layer_class, args) def test_recursion_with_bn_and_loss(): model1 = Sequential([ layers.Dense(5, input_dim=5, activity_regularizer='l1'), layers.BatchNormalization(), layers.Dense(5), ]) print('NEW MODEL') inputs = layers.Input(shape=(5,)) outputs = model1(inputs) model2 = Model(inputs=inputs, outputs=outputs) assert len(model1.updates) == 2 assert len(model2.updates) == 2 assert len(model1.losses) == 1 assert len(model2.losses) == 1, model2.layers[1]._per_input_losses model1.compile(optimizer='sgd', loss='categorical_crossentropy') model2.compile(optimizer='sgd', loss='categorical_crossentropy') x = np.ones((3, 5)) y = np.ones((3, 5)) model1.fit(x, y, verbose=0, epochs=1) model2.fit(x, y, verbose=0, epochs=1) def test_activity_regularization_with_model_composition(): def reg(x): return K.sum(x) net_a_input = Input((2,)) net_a = net_a_input net_a = Dense(2, kernel_initializer='ones', use_bias=False, activity_regularizer=reg)(net_a) model_a = Model([net_a_input], [net_a]) net_b_input = Input((2,)) net_b = model_a(net_b_input) model_b = Model([net_b_input], [net_b]) model_b.compile(optimizer='sgd', loss=None) x = np.ones((1, 2)) loss = model_b.evaluate(x) assert loss == 4 def test_shared_layer_depth_is_correct(): # Basic outline here: we have a shared embedding layer, and two inputs that # go through different depths of computation in the graph before # the final output. We need the computed depth of the input layers to be # the same, because they both pass through the embedding layer before anything # else happens. That's what we're testing. from keras.layers import Embedding, Input, Dense, Concatenate from keras.models import Model input1 = Input(shape=(10,), name='input1') input2 = Input(shape=(10,), name='input2') embedding_layer = Embedding(name='embedding', input_dim=5, output_dim=10) embedded_input1 = embedding_layer(input1) embedded_input2 = embedding_layer(input2) transformed_input2 = Dense(6)(Dense(5)(Dense(3)(embedded_input2))) final_output = Dense(2)(Concatenate()([embedded_input1, transformed_input2])) model = Model(inputs=[input1, input2], outputs=final_output) input1_depth = -1 input2_depth = -1 for depth, layers in model._layers_by_depth.items(): for layer in layers: if layer.name == 'input1': input1_depth = depth if layer.name == 'input2': input2_depth = depth assert input1_depth != -1 assert input1_depth == input2_depth def test_layer_sharing_at_heterogeneous_depth(): x_val = np.random.random((10, 5)) x = Input(shape=(5,)) A = Dense(5, name='A') B = Dense(5, name='B') output = A(B(A(B(x)))) M = Model(x, output) output_val = M.predict(x_val) config = M.get_config() weights = M.get_weights() M2 = Model.from_config(config) M2.set_weights(weights) output_val_2 = M2.predict(x_val) np.testing.assert_allclose(output_val, output_val_2, atol=1e-6) def test_layer_sharing_at_heterogeneous_depth_with_concat(): input_shape = (16, 9, 3) input_layer = Input(shape=input_shape) A = Dense(3, name='dense_A') B = Dense(3, name='dense_B') C = Dense(3, name='dense_C') x1 = B(A(input_layer)) x2 = A(C(input_layer)) output = layers.concatenate([x1, x2]) M = Model(inputs=input_layer, outputs=output) x_val = np.random.random((10, 16, 9, 3)) output_val = M.predict(x_val) config = M.get_config() weights = M.get_weights() M2 = Model.from_config(config) M2.set_weights(weights) output_val_2 = M2.predict(x_val) np.testing.assert_allclose(output_val, output_val_2, atol=1e-6) def test_multi_output_mask(): """Fixes #7589""" class TestMultiOutputLayer(Layer): def __init__(self, **kwargs): super(TestMultiOutputLayer, self).__init__(**kwargs) def call(self, inputs, **kwargs): return [K.abs(inputs), K.abs(inputs)] def compute_output_shape(self, input_shape): out_shape = super(TestMultiOutputLayer, self).compute_output_shape( input_shape) return [out_shape, out_shape] class TestMultiInputLayer(Layer): def __init__(self, **kwargs): super(TestMultiInputLayer, self).__init__(**kwargs) def call(self, inputs, **kwargs): negative, positive = inputs return negative + positive input_layer = Input(shape=(16, 16, 3)) x, y = TestMultiOutputLayer()(input_layer) z = TestMultiInputLayer()([x, y]) _ = Model(inputs=input_layer, outputs=z) assert K.int_shape(z)[1:] == (16, 16, 3) def test_constant_initializer_with_numpy(): model = Sequential() model.add(Dense(2, input_shape=(3,), kernel_initializer=Constant(np.ones((3, 2))))) model.add(Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) json_str = model.to_json() model_from_json(json_str).summary() yaml_str = model.to_yaml() model_from_yaml(yaml_str).summary() if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/conftest.py0000644000000000116100000000054613354530144015671 0ustar rooteng00000000000000import pytest from keras import backend as K @pytest.fixture(autouse=True) def clear_session_after_test(): """Test wrapper to clean up after TensorFlow and CNTK tests. This wrapper runs for all the tests in the keras test suite. """ yield if K.backend() == 'tensorflow' or K.backend() == 'cntk': K.clear_session() Keras-2.2.4/tests/test_documentation.py0000644000000000116100000001332113342055016017744 0ustar rooteng00000000000000import importlib import inspect import re import sys from itertools import compress import pytest modules = ['keras.layers', 'keras.models', 'keras', 'keras.backend.tensorflow_backend', 'keras.engine', 'keras.wrappers', 'keras.utils', 'keras.callbacks', 'keras.activations', 'keras.losses', 'keras.models', 'keras.optimizers'] accepted_name = ['from_config'] accepted_module = ['keras.legacy.layers', 'keras.utils.generic_utils'] # Functions or classes with less than 'MIN_CODE_SIZE' lines can be ignored MIN_CODE_SIZE = 10 def handle_class(name, member): if is_accepted(name, member): return if member.__doc__ is None and not member_too_small(member): raise ValueError("{} class doesn't have any documentation".format(name), member.__module__, inspect.getmodule(member).__file__) for n, met in inspect.getmembers(member): if inspect.ismethod(met): handle_method(n, met) def handle_function(name, member): if is_accepted(name, member) or member_too_small(member): # We don't need to check this one. return doc = member.__doc__ if doc is None: raise ValueError("{} function doesn't have any documentation".format(name), member.__module__, inspect.getmodule(member).__file__) args = list(inspect.signature(member).parameters.keys()) assert_args_presence(args, doc, member, name) assert_function_style(name, member, doc, args) assert_doc_style(name, member, doc) def assert_doc_style(name, member, doc): lines = doc.split("\n") first_line = lines[0] if len(first_line.strip()) == 0: raise ValueError( "{} the documentation should be on the first line.".format(name), member.__module__) if first_line.strip()[-1] != '.': raise ValueError("{} first line should end with a '.'".format(name), member.__module__) def assert_function_style(name, member, doc, args): code = inspect.getsource(member) has_return = re.findall(r"\s*return \S+", code, re.MULTILINE) if has_return and "# Returns" not in doc: innerfunction = [inspect.getsource(x) for x in member.__code__.co_consts if inspect.iscode(x)] return_in_sub = [ret for code_inner in innerfunction for ret in re.findall(r"\s*return \S+", code_inner, re.MULTILINE)] if len(return_in_sub) < len(has_return): raise ValueError("{} needs a '# Returns' section".format(name), member.__module__) has_raise = re.findall(r"^\s*raise \S+", code, re.MULTILINE) if has_raise and "# Raises" not in doc: innerfunction = [inspect.getsource(x) for x in member.__code__.co_consts if inspect.iscode(x)] raise_in_sub = [ret for code_inner in innerfunction for ret in re.findall(r"\s*raise \S+", code_inner, re.MULTILINE)] if len(raise_in_sub) < len(has_raise): raise ValueError("{} needs a '# Raises' section".format(name), member.__module__) if len(args) > 0 and "# Arguments" not in doc: raise ValueError("{} needs a '# Arguments' section".format(name), member.__module__) assert_blank_before(name, member, doc, ['# Arguments', '# Raises', '# Returns']) def assert_blank_before(name, member, doc, keywords): doc_lines = [x.strip() for x in doc.split('\n')] for keyword in keywords: if keyword in doc_lines: index = doc_lines.index(keyword) if doc_lines[index - 1] != '': raise ValueError( "{} '{}' should have a blank line above.".format(name, keyword), member.__module__) def is_accepted(name, member): if 'keras' not in str(member.__module__): return True return name in accepted_name or member.__module__ in accepted_module def member_too_small(member): code = inspect.getsource(member).split('\n') return len(code) < MIN_CODE_SIZE def assert_args_presence(args, doc, member, name): args_not_in_doc = [arg not in doc for arg in args] if any(args_not_in_doc): raise ValueError( "{} {} arguments are not present in documentation ".format(name, list( compress(args, args_not_in_doc))), member.__module__) words = doc.replace('*', '').split() # Check arguments styling styles = [arg + ":" not in words for arg in args] if any(styles): raise ValueError( "{} {} are not style properly 'argument': documentation".format( name, list(compress(args, styles))), member.__module__) # Check arguments order indexes = [words.index(arg + ":") for arg in args] if indexes != sorted(indexes): raise ValueError( "{} arguments order is different from the documentation".format(name), member.__module__) def handle_method(name, member): if name in accepted_name or member.__module__ in accepted_module: return handle_function(name, member) def handle_module(mod): for name, mem in inspect.getmembers(mod): if inspect.isclass(mem): handle_class(name, mem) elif inspect.isfunction(mem): handle_function(name, mem) elif 'keras' in name and inspect.ismodule(mem): # Only test keras' modules handle_module(mem) @pytest.mark.skipif(sys.version_info < (3, 3), reason="requires python3.3") def test_doc(): for module in modules: mod = importlib.import_module(module) handle_module(mod) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/test_dynamic_trainability.py0000644000000000116100000000654113354530144021303 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import print_function import pytest from keras.models import Model, Sequential from keras.layers import Dense, Input def test_layer_trainability_switch(): # with constructor argument, in Sequential model = Sequential() model.add(Dense(2, trainable=False, input_dim=1)) assert model.trainable_weights == [] # by setting the `trainable` argument, in Sequential model = Sequential() layer = Dense(2, input_dim=1) model.add(layer) assert model.trainable_weights == layer.trainable_weights layer.trainable = False assert model.trainable_weights == [] # with constructor argument, in Model x = Input(shape=(1,)) y = Dense(2, trainable=False)(x) model = Model(x, y) assert model.trainable_weights == [] # by setting the `trainable` argument, in Model x = Input(shape=(1,)) layer = Dense(2) y = layer(x) model = Model(x, y) assert model.trainable_weights == layer.trainable_weights layer.trainable = False assert model.trainable_weights == [] def test_model_trainability_switch(): # a non-trainable model has no trainable weights x = Input(shape=(1,)) y = Dense(2)(x) model = Model(x, y) model.trainable = False assert model.trainable_weights == [] # same for Sequential model = Sequential() model.add(Dense(2, input_dim=1)) model.trainable = False assert model.trainable_weights == [] def test_nested_model_trainability(): # a Sequential inside a Model inner_model = Sequential() inner_model.add(Dense(2, input_dim=1)) x = Input(shape=(1,)) y = inner_model(x) outer_model = Model(x, y) assert outer_model.trainable_weights == inner_model.trainable_weights inner_model.trainable = False assert outer_model.trainable_weights == [] inner_model.trainable = True inner_model.layers[-1].trainable = False assert outer_model.trainable_weights == [] # a Sequential inside a Sequential inner_model = Sequential() inner_model.add(Dense(2, input_dim=1)) outer_model = Sequential() outer_model.add(inner_model) assert outer_model.trainable_weights == inner_model.trainable_weights inner_model.trainable = False assert outer_model.trainable_weights == [] inner_model.trainable = True inner_model.layers[-1].trainable = False assert outer_model.trainable_weights == [] # a Model inside a Model x = Input(shape=(1,)) y = Dense(2)(x) inner_model = Model(x, y) x = Input(shape=(1,)) y = inner_model(x) outer_model = Model(x, y) assert outer_model.trainable_weights == inner_model.trainable_weights inner_model.trainable = False assert outer_model.trainable_weights == [] inner_model.trainable = True inner_model.layers[-1].trainable = False assert outer_model.trainable_weights == [] # a Model inside a Sequential x = Input(shape=(1,)) y = Dense(2)(x) inner_model = Model(x, y) outer_model = Sequential() outer_model.add(inner_model) assert outer_model.trainable_weights == inner_model.trainable_weights inner_model.trainable = False assert outer_model.trainable_weights == [] inner_model.trainable = True inner_model.layers[-1].trainable = False assert outer_model.trainable_weights == [] if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/integration_tests/0000755000000000116100000000000013355226624017240 5ustar rooteng00000000000000Keras-2.2.4/tests/integration_tests/test_tensorflow_integration.py0000644000000000116100000000271313354530144025453 0ustar rooteng00000000000000from __future__ import print_function import os import tempfile import pytest import keras from keras import layers from keras.utils.test_utils import get_test_data @pytest.mark.skipif(keras.backend.backend() != 'tensorflow', reason='Requires TF backend') def test_tf_optimizer(): import tensorflow as tf num_hidden = 10 output_dim = 2 input_dim = 10 target = 0.8 optimizer = tf.train.AdadeltaOptimizer( learning_rate=1., rho=0.95, epsilon=1e-08) (x_train, y_train), (x_test, y_test) = get_test_data( num_train=1000, num_test=200, input_shape=(input_dim,), classification=True, num_classes=output_dim) model = keras.Sequential() model.add(layers.Dense(num_hidden, activation='relu', input_shape=(input_dim,))) model.add(layers.Dense(output_dim, activation='softmax')) model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=8, batch_size=16, validation_data=(x_test, y_test), verbose=2) assert history.history['val_acc'][-1] >= target # Test saving. _, fname = tempfile.mkstemp('.h5') model.save(fname) model = keras.models.load_model(fname) assert len(model.weights) == 4 os.remove(fname) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/integration_tests/test_datasets.py0000644000000000116100000000617613247612467022477 0ustar rooteng00000000000000from __future__ import print_function import pytest import time import random from keras.datasets import cifar10 from keras.datasets import cifar100 from keras.datasets import reuters from keras.datasets import imdb from keras.datasets import mnist from keras.datasets import boston_housing from keras.datasets import fashion_mnist def test_cifar(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = cifar10.load_data() assert len(x_train) == len(y_train) == 50000 assert len(x_test) == len(y_test) == 10000 (x_train, y_train), (x_test, y_test) = cifar100.load_data('fine') assert len(x_train) == len(y_train) == 50000 assert len(x_test) == len(y_test) == 10000 (x_train, y_train), (x_test, y_test) = cifar100.load_data('coarse') assert len(x_train) == len(y_train) == 50000 assert len(x_test) == len(y_test) == 10000 def test_reuters(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = reuters.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) assert len(x_train) + len(x_test) == 11228 (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = reuters.get_word_index() assert isinstance(word_index, dict) def test_mnist(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = mnist.load_data() assert len(x_train) == len(y_train) == 60000 assert len(x_test) == len(y_test) == 10000 def test_imdb(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = imdb.load_data() (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = imdb.get_word_index() assert isinstance(word_index, dict) def test_boston_housing(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = boston_housing.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) def test_fashion_mnist(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() assert len(x_train) == len(y_train) == 60000 assert len(x_test) == len(y_test) == 10000 if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/integration_tests/imagenet_utils_test.py0000644000000000116100000001052113326715636023665 0ustar rooteng00000000000000import pytest import numpy as np from numpy.testing import assert_allclose from keras.applications import imagenet_utils as utils from keras.models import Model from keras.layers import Input, Lambda def test_preprocess_input(): # Test image batch with float and int image input x = np.random.uniform(0, 255, (2, 10, 10, 3)) xint = x.astype('int32') assert utils.preprocess_input(x).shape == x.shape assert utils.preprocess_input(xint).shape == xint.shape out1 = utils.preprocess_input(x, 'channels_last') out1int = utils.preprocess_input(xint, 'channels_last') out2 = utils.preprocess_input(np.transpose(x, (0, 3, 1, 2)), 'channels_first') out2int = utils.preprocess_input(np.transpose(xint, (0, 3, 1, 2)), 'channels_first') assert_allclose(out1, out2.transpose(0, 2, 3, 1)) assert_allclose(out1int, out2int.transpose(0, 2, 3, 1)) # Test single image x = np.random.uniform(0, 255, (10, 10, 3)) xint = x.astype('int32') assert utils.preprocess_input(x).shape == x.shape assert utils.preprocess_input(xint).shape == xint.shape out1 = utils.preprocess_input(x, 'channels_last') out1int = utils.preprocess_input(xint, 'channels_last') out2 = utils.preprocess_input(np.transpose(x, (2, 0, 1)), 'channels_first') out2int = utils.preprocess_input(np.transpose(xint, (2, 0, 1)), 'channels_first') assert_allclose(out1, out2.transpose(1, 2, 0)) assert_allclose(out1int, out2int.transpose(1, 2, 0)) # Test that writing over the input data works predictably for mode in ['torch', 'tf']: x = np.random.uniform(0, 255, (2, 10, 10, 3)) xint = x.astype('int') x2 = utils.preprocess_input(x, mode=mode) xint2 = utils.preprocess_input(xint) assert_allclose(x, x2) assert xint.astype('float').max() != xint2.max() # Caffe mode works differently from the others x = np.random.uniform(0, 255, (2, 10, 10, 3)) xint = x.astype('int') x2 = utils.preprocess_input(x, data_format='channels_last', mode='caffe') xint2 = utils.preprocess_input(xint) assert_allclose(x, x2[..., ::-1]) assert xint.astype('float').max() != xint2.max() def test_preprocess_input_symbolic(): # Test image batch x = np.random.uniform(0, 255, (2, 10, 10, 3)) inputs = Input(shape=x.shape[1:]) outputs = Lambda(utils.preprocess_input, output_shape=x.shape[1:])(inputs) model = Model(inputs, outputs) assert model.predict(x).shape == x.shape outputs1 = Lambda(lambda x: utils.preprocess_input(x, 'channels_last'), output_shape=x.shape[1:])(inputs) model1 = Model(inputs, outputs1) out1 = model1.predict(x) x2 = np.transpose(x, (0, 3, 1, 2)) inputs2 = Input(shape=x2.shape[1:]) outputs2 = Lambda(lambda x: utils.preprocess_input(x, 'channels_first'), output_shape=x2.shape[1:])(inputs2) model2 = Model(inputs2, outputs2) out2 = model2.predict(x2) assert_allclose(out1, out2.transpose(0, 2, 3, 1)) # Test single image x = np.random.uniform(0, 255, (10, 10, 3)) inputs = Input(shape=x.shape) outputs = Lambda(utils.preprocess_input, output_shape=x.shape)(inputs) model = Model(inputs, outputs) assert model.predict(x[np.newaxis])[0].shape == x.shape outputs1 = Lambda(lambda x: utils.preprocess_input(x, 'channels_last'), output_shape=x.shape)(inputs) model1 = Model(inputs, outputs1) out1 = model1.predict(x[np.newaxis])[0] x2 = np.transpose(x, (2, 0, 1)) inputs2 = Input(shape=x2.shape) outputs2 = Lambda(lambda x: utils.preprocess_input(x, 'channels_first'), output_shape=x2.shape)(inputs2) model2 = Model(inputs2, outputs2) out2 = model2.predict(x2[np.newaxis])[0] assert_allclose(out1, out2.transpose(1, 2, 0)) def test_decode_predictions(): x = np.zeros((2, 1000)) x[0, 372] = 1.0 x[1, 549] = 1.0 outs = utils.decode_predictions(x, top=1) scores = [out[0][2] for out in outs] assert scores[0] == scores[1] # the numbers of columns and ImageNet classes are not identical. with pytest.raises(ValueError): utils.decode_predictions(np.ones((2, 100))) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/integration_tests/test_vector_data_tasks.py0000644000000000116100000000713613354530144024352 0ustar rooteng00000000000000from __future__ import print_function import pytest from keras.utils.test_utils import get_test_data from keras.models import Sequential from keras import layers import keras from keras.utils.np_utils import to_categorical num_classes = 2 def test_vector_classification(): ''' Classify random float vectors into 2 classes with logistic regression using 2 layer neural network with ReLU hidden units. ''' (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500, num_test=200, input_shape=(20,), classification=True, num_classes=num_classes) y_train = to_categorical(y_train) y_test = to_categorical(y_test) # Test with Sequential API model = Sequential([ layers.Dense(16, input_shape=(x_train.shape[-1],), activation='relu'), layers.Dense(8), layers.Activation('relu'), layers.Dense(num_classes, activation='softmax') ]) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.summary() history = model.fit(x_train, y_train, epochs=15, batch_size=16, validation_data=(x_test, y_test), verbose=0) assert(history.history['val_acc'][-1] > 0.8) config = model.get_config() model = Sequential.from_config(config) def test_vector_classification_functional(): (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500, num_test=200, input_shape=(20,), classification=True, num_classes=num_classes) # Test with functional API inputs = layers.Input(shape=(x_train.shape[-1],)) x = layers.Dense(16, activation=keras.activations.relu)(inputs) x = layers.Dense(8)(x) x = layers.Activation('relu')(x) outputs = layers.Dense(num_classes, activation='softmax')(x) model = keras.models.Model(inputs, outputs) model.compile(loss=keras.losses.sparse_categorical_crossentropy, optimizer=keras.optimizers.RMSprop(), metrics=['acc']) history = model.fit(x_train, y_train, epochs=15, batch_size=16, validation_data=(x_test, y_test), verbose=0) assert(history.history['val_acc'][-1] > 0.8) def test_vector_regression(): ''' Perform float data prediction (regression) using 2 layer MLP with tanh and sigmoid activations. ''' (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500, num_test=200, input_shape=(20,), output_shape=(num_classes,), classification=False) model = Sequential([ layers.Dense(16, input_shape=(x_train.shape[-1],), activation='tanh'), layers.Dense(num_classes) ]) model.compile(loss='hinge', optimizer='adagrad') history = model.fit(x_train, y_train, epochs=20, batch_size=16, validation_data=(x_test, y_test), verbose=0) assert (history.history['val_loss'][-1] < 0.9) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/integration_tests/test_temporal_data_tasks.py0000644000000000116100000002144513354530144024672 0ustar rooteng00000000000000from __future__ import print_function import numpy as np import pytest import string from keras.utils.test_utils import get_test_data from keras.utils.np_utils import to_categorical from keras.models import Sequential from keras import layers, optimizers import keras.backend as K import keras def test_temporal_classification(): ''' Classify temporal sequences of float numbers of length 3 into 2 classes using single layer of GRU units and softmax applied to the last activations of the units ''' np.random.seed(1337) (x_train, y_train), (x_test, y_test) = get_test_data(num_train=200, num_test=20, input_shape=(3, 4), classification=True, num_classes=2) y_train = to_categorical(y_train) y_test = to_categorical(y_test) model = Sequential() model.add(layers.GRU(8, input_shape=(x_train.shape[1], x_train.shape[2]))) model.add(layers.Dense(y_train.shape[-1], activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.summary() history = model.fit(x_train, y_train, epochs=4, batch_size=10, validation_data=(x_test, y_test), verbose=0) assert(history.history['acc'][-1] >= 0.8) config = model.get_config() model = Sequential.from_config(config) def test_temporal_classification_functional(): ''' Classify temporal sequences of float numbers of length 3 into 2 classes using single layer of GRU units and softmax applied to the last activations of the units ''' np.random.seed(1337) (x_train, y_train), (x_test, y_test) = get_test_data(num_train=200, num_test=20, input_shape=(3, 4), classification=True, num_classes=2) y_train = to_categorical(y_train) y_test = to_categorical(y_test) inputs = layers.Input(shape=(x_train.shape[1], x_train.shape[2])) x = layers.SimpleRNN(8)(inputs) outputs = layers.Dense(y_train.shape[-1], activation='softmax')(x) model = keras.models.Model(inputs, outputs) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=4, batch_size=10, validation_data=(x_test, y_test), verbose=0) assert(history.history['acc'][-1] >= 0.8) def test_temporal_regression(): ''' Predict float numbers (regression) based on sequences of float numbers of length 3 using a single layer of GRU units ''' np.random.seed(1337) (x_train, y_train), (x_test, y_test) = get_test_data(num_train=200, num_test=20, input_shape=(3, 5), output_shape=(2,), classification=False) model = Sequential() model.add(layers.LSTM(y_train.shape[-1], input_shape=(x_train.shape[1], x_train.shape[2]))) model.compile(loss='hinge', optimizer='adam') history = model.fit(x_train, y_train, epochs=5, batch_size=16, validation_data=(x_test, y_test), verbose=0) assert(history.history['loss'][-1] < 1.) def test_3d_to_3d(): ''' Apply a same Dense layer for each element of time dimension of the input and make predictions of the output sequence elements. This does not make use of the temporal structure of the sequence (see TimeDistributedDense for more details) ''' np.random.seed(1337) (x_train, y_train), (x_test, y_test) = get_test_data(num_train=100, num_test=20, input_shape=(3, 5), output_shape=(3, 5), classification=False) model = Sequential() model.add(layers.TimeDistributed( layers.Dense(y_train.shape[-1]), input_shape=x_train.shape[1:3])) model.compile(loss='hinge', optimizer='rmsprop') history = model.fit(x_train, y_train, epochs=20, batch_size=16, validation_data=(x_test, y_test), verbose=0) assert(history.history['loss'][-1] < 1.) def test_stacked_lstm_char_prediction(): ''' Learn alphabetical char sequence with stacked LSTM. Predict the whole alphabet based on the first two letters ('ab' -> 'ab...z') See non-toy example in examples/lstm_text_generation.py ''' # generate alphabet: # http://stackoverflow.com/questions/16060899/alphabet-range-python alphabet = string.ascii_lowercase number_of_chars = len(alphabet) # generate char sequences of length 'sequence_length' out of alphabet and # store the next char as label (e.g. 'ab'->'c') sequence_length = 2 sentences = [alphabet[i: i + sequence_length] for i in range(len(alphabet) - sequence_length)] next_chars = [alphabet[i + sequence_length] for i in range(len(alphabet) - sequence_length)] # Transform sequences and labels into 'one-hot' encoding x = np.zeros((len(sentences), sequence_length, number_of_chars), dtype=np.bool) y = np.zeros((len(sentences), number_of_chars), dtype=np.bool) for i, sentence in enumerate(sentences): for t, char in enumerate(sentence): x[i, t, ord(char) - ord('a')] = 1 y[i, ord(next_chars[i]) - ord('a')] = 1 # learn the alphabet with stacked LSTM model = Sequential([ layers.LSTM(16, return_sequences=True, input_shape=(sequence_length, number_of_chars)), layers.LSTM(16, return_sequences=False), layers.Dense(number_of_chars, activation='softmax') ]) model.compile(loss='categorical_crossentropy', optimizer='adam') model.fit(x, y, batch_size=1, epochs=60, verbose=1) # prime the model with 'ab' sequence and let it generate the learned alphabet sentence = alphabet[:sequence_length] generated = sentence for iteration in range(number_of_chars - sequence_length): x = np.zeros((1, sequence_length, number_of_chars)) for t, char in enumerate(sentence): x[0, t, ord(char) - ord('a')] = 1. preds = model.predict(x, verbose=0)[0] next_char = chr(np.argmax(preds) + ord('a')) generated += next_char sentence = sentence[1:] + next_char # check that it did generate the alphabet correctly assert(generated == alphabet) def test_masked_temporal(): ''' Confirm that even with masking on both inputs and outputs, cross-entropies are of the expected scale. In this task, there are variable length inputs of integers from 1-9, and a random subset of unmasked outputs. Each of these outputs has a 50% probability of being the input number unchanged, and a 50% probability of being 2*input%10. The ground-truth best cross-entropy loss should, then be -log(0.5) = 0.69 ''' np.random.seed(1338) model = Sequential() model.add(layers.Embedding(10, 10, mask_zero=True)) model.add(layers.Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam') x = np.random.randint(1, 10, size=(20000, 10)) for rowi in range(x.shape[0]): padding = np.random.randint(0, x.shape[1] / 2 + 1) x[rowi, :padding] = 0 # 50% of the time the correct output is the input. # The other 50% of the time it's 2 * input % 10 y = (x * np.random.randint(1, 3, size=x.shape)) % 10 ys = np.zeros((y.size, 10), dtype='int32') for i, target in enumerate(y.flat): ys[i, target] = 1 ys = ys.reshape(y.shape + (10,)) history = model.fit(x, ys, validation_split=0.05, batch_size=10, verbose=0, epochs=3) ground_truth = -np.log(0.5) assert(np.abs(history.history['loss'][-1] - ground_truth) < 0.06) @pytest.mark.skipif(K.backend() != 'tensorflow', reason='Requires TF backend') def test_embedding_with_clipnorm(): model = Sequential() model.add(layers.Embedding(input_dim=1, output_dim=1)) model.compile(optimizer=optimizers.SGD(clipnorm=0.1), loss='mse') model.fit(np.array([[0]]), np.array([[[0.5]]]), epochs=1) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/integration_tests/preprocessing/0000755000000000116100000000000013355226624022123 5ustar rooteng00000000000000Keras-2.2.4/tests/integration_tests/preprocessing/sequence_test.py0000644000000000116100000002162513342055016025341 0ustar rooteng00000000000000from math import ceil import numpy as np from numpy.testing import assert_allclose, assert_raises import pytest from keras.preprocessing.sequence import pad_sequences from keras.preprocessing.sequence import make_sampling_table from keras.preprocessing.sequence import skipgrams from keras.preprocessing.sequence import _remove_long_seq from keras.preprocessing.sequence import TimeseriesGenerator def test_pad_sequences(): a = [[1], [1, 2], [1, 2, 3]] # test padding b = pad_sequences(a, maxlen=3, padding='pre') assert_allclose(b, [[0, 0, 1], [0, 1, 2], [1, 2, 3]]) b = pad_sequences(a, maxlen=3, padding='post') assert_allclose(b, [[1, 0, 0], [1, 2, 0], [1, 2, 3]]) # test truncating b = pad_sequences(a, maxlen=2, truncating='pre') assert_allclose(b, [[0, 1], [1, 2], [2, 3]]) b = pad_sequences(a, maxlen=2, truncating='post') assert_allclose(b, [[0, 1], [1, 2], [1, 2]]) # test value b = pad_sequences(a, maxlen=3, value=1) assert_allclose(b, [[1, 1, 1], [1, 1, 2], [1, 2, 3]]) def test_pad_sequences_vector(): a = [[[1, 1]], [[2, 1], [2, 2]], [[3, 1], [3, 2], [3, 3]]] # test padding b = pad_sequences(a, maxlen=3, padding='pre') assert_allclose(b, [[[0, 0], [0, 0], [1, 1]], [[0, 0], [2, 1], [2, 2]], [[3, 1], [3, 2], [3, 3]]]) b = pad_sequences(a, maxlen=3, padding='post') assert_allclose(b, [[[1, 1], [0, 0], [0, 0]], [[2, 1], [2, 2], [0, 0]], [[3, 1], [3, 2], [3, 3]]]) # test truncating b = pad_sequences(a, maxlen=2, truncating='pre') assert_allclose(b, [[[0, 0], [1, 1]], [[2, 1], [2, 2]], [[3, 2], [3, 3]]]) b = pad_sequences(a, maxlen=2, truncating='post') assert_allclose(b, [[[0, 0], [1, 1]], [[2, 1], [2, 2]], [[3, 1], [3, 2]]]) # test value b = pad_sequences(a, maxlen=3, value=1) assert_allclose(b, [[[1, 1], [1, 1], [1, 1]], [[1, 1], [2, 1], [2, 2]], [[3, 1], [3, 2], [3, 3]]]) def test_make_sampling_table(): a = make_sampling_table(3) assert_allclose(a, np.asarray([0.00315225, 0.00315225, 0.00547597]), rtol=.1) def test_skipgrams(): # test with no window size and binary labels couples, labels = skipgrams(np.arange(3), vocabulary_size=3) for couple in couples: assert couple[0] in [0, 1, 2] and couple[1] in [0, 1, 2] # test window size and categorical labels couples, labels = skipgrams(np.arange(5), vocabulary_size=5, window_size=1, categorical=True) for couple in couples: assert couple[0] - couple[1] <= 3 for l in labels: assert len(l) == 2 def test_remove_long_seq(): maxlen = 5 seq = [ [1, 2, 3], [1, 2, 3, 4, 5, 6], ] label = ['a', 'b'] new_seq, new_label = _remove_long_seq(maxlen, seq, label) assert new_seq == [[1, 2, 3]] assert new_label == ['a'] def test_TimeseriesGenerator(): data = np.array([[i] for i in range(50)]) targets = np.array([[i] for i in range(50)]) data_gen = TimeseriesGenerator(data, targets, length=10, sampling_rate=2, batch_size=2) assert len(data_gen) == 20 assert (np.allclose(data_gen[0][0], np.array([[[0], [2], [4], [6], [8]], [[1], [3], [5], [7], [9]]]))) assert (np.allclose(data_gen[0][1], np.array([[10], [11]]))) assert (np.allclose(data_gen[1][0], np.array([[[2], [4], [6], [8], [10]], [[3], [5], [7], [9], [11]]]))) assert (np.allclose(data_gen[1][1], np.array([[12], [13]]))) data_gen = TimeseriesGenerator(data, targets, length=10, sampling_rate=2, reverse=True, batch_size=2) assert len(data_gen) == 20 assert (np.allclose(data_gen[0][0], np.array([[[8], [6], [4], [2], [0]], [[9], [7], [5], [3], [1]]]))) assert (np.allclose(data_gen[0][1], np.array([[10], [11]]))) data_gen = TimeseriesGenerator(data, targets, length=10, sampling_rate=2, shuffle=True, batch_size=1) batch = data_gen[0] r = batch[1][0][0] assert (np.allclose(batch[0], np.array([[[r - 10], [r - 8], [r - 6], [r - 4], [r - 2]]]))) assert (np.allclose(batch[1], np.array([[r], ]))) data_gen = TimeseriesGenerator(data, targets, length=10, sampling_rate=2, stride=2, batch_size=2) assert len(data_gen) == 10 assert (np.allclose(data_gen[1][0], np.array([[[4], [6], [8], [10], [12]], [[6], [8], [10], [12], [14]]]))) assert (np.allclose(data_gen[1][1], np.array([[14], [16]]))) data_gen = TimeseriesGenerator(data, targets, length=10, sampling_rate=2, start_index=10, end_index=30, batch_size=2) assert len(data_gen) == 6 assert (np.allclose(data_gen[0][0], np.array([[[10], [12], [14], [16], [18]], [[11], [13], [15], [17], [19]]]))) assert (np.allclose(data_gen[0][1], np.array([[20], [21]]))) data = np.array([np.random.random_sample((1, 2, 3, 4)) for i in range(50)]) targets = np.array([np.random.random_sample((3, 2, 1)) for i in range(50)]) data_gen = TimeseriesGenerator(data, targets, length=10, sampling_rate=2, start_index=10, end_index=30, batch_size=2) assert len(data_gen) == 6 assert np.allclose(data_gen[0][0], np.array( [np.array(data[10:19:2]), np.array(data[11:20:2])])) assert (np.allclose(data_gen[0][1], np.array([targets[20], targets[21]]))) with assert_raises(ValueError) as context: TimeseriesGenerator(data, targets, length=50) error = str(context.exception) assert '`start_index+length=50 > end_index=49` is disallowed' in error def test_TimeSeriesGenerator_doesnt_miss_any_sample(): x = np.array([[i] for i in range(10)]) for length in range(3, 10): g = TimeseriesGenerator(x, x, length=length, batch_size=1) expected = max(0, len(x) - length) actual = len(g) assert expected == actual if len(g) > 0: # All elements in range(length, 10) should be used as current step expected = np.arange(length, 10).reshape(-1, 1) y = np.concatenate([g[ix][1] for ix in range(len(g))], axis=0) assert_allclose(y, expected) x = np.array([[i] for i in range(23)]) strides = (1, 1, 5, 7, 3, 5, 3) lengths = (3, 3, 4, 3, 1, 3, 7) batch_sizes = (6, 6, 6, 5, 6, 6, 6) shuffles = (False, True, True, False, False, False, False) for stride, length, batch_size, shuffle in zip(strides, lengths, batch_sizes, shuffles): g = TimeseriesGenerator(x, x, length=length, sampling_rate=1, stride=stride, start_index=0, end_index=None, shuffle=shuffle, reverse=False, batch_size=batch_size) if shuffle: # all batches have the same size when shuffle is True. expected_sequences = ceil( (23 - length) / float(batch_size * stride)) * batch_size else: # last batch will be different if `(samples - length) / stride` # is not a multiple of `batch_size`. expected_sequences = ceil((23 - length) / float(stride)) expected_batches = ceil(expected_sequences / float(batch_size)) y = [g[ix][1] for ix in range(len(g))] actual_sequences = sum(len(_y) for _y in y) actual_batches = len(y) assert expected_sequences == actual_sequences assert expected_batches == actual_batches if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/integration_tests/preprocessing/text_test.py0000644000000000116100000000702113342055016024507 0ustar rooteng00000000000000# -*- coding: utf-8 -*- import numpy as np import pytest from keras.preprocessing.text import Tokenizer from keras.preprocessing.text import one_hot from keras.preprocessing.text import hashing_trick from keras.preprocessing.text import text_to_word_sequence def test_one_hot(): text = 'The cat sat on the mat.' encoded = one_hot(text, 5) assert len(encoded) == 6 assert np.max(encoded) <= 4 assert np.min(encoded) >= 0 def test_hashing_trick_hash(): text = 'The cat sat on the mat.' encoded = hashing_trick(text, 5) assert len(encoded) == 6 assert np.max(encoded) <= 4 assert np.min(encoded) >= 1 def test_hashing_trick_md5(): text = 'The cat sat on the mat.' encoded = hashing_trick(text, 5, hash_function='md5') assert len(encoded) == 6 assert np.max(encoded) <= 4 assert np.min(encoded) >= 1 def test_tokenizer(): texts = ['The cat sat on the mat.', 'The dog sat on the log.', 'Dogs and cats living together.'] tokenizer = Tokenizer(num_words=10) tokenizer.fit_on_texts(texts) sequences = [] for seq in tokenizer.texts_to_sequences_generator(texts): sequences.append(seq) assert np.max(np.max(sequences)) < 10 assert np.min(np.min(sequences)) == 1 tokenizer.fit_on_sequences(sequences) for mode in ['binary', 'count', 'tfidf', 'freq']: matrix = tokenizer.texts_to_matrix(texts, mode) def test_sequential_fit(): texts = ['The cat sat on the mat.', 'The dog sat on the log.', 'Dogs and cats living together.'] word_sequences = [ ['The', 'cat', 'is', 'sitting'], ['The', 'dog', 'is', 'standing'] ] tokenizer = Tokenizer() tokenizer.fit_on_texts(texts) tokenizer.fit_on_texts(word_sequences) assert tokenizer.document_count == 5 tokenizer.texts_to_matrix(texts) tokenizer.texts_to_matrix(word_sequences) def test_text_to_word_sequence(): text = 'hello! ? world!' assert text_to_word_sequence(text) == ['hello', 'world'] def test_text_to_word_sequence_multichar_split(): text = 'hello!stop?world!' assert text_to_word_sequence(text, split='stop') == ['hello', 'world'] def test_text_to_word_sequence_unicode(): text = u'ali! veli? kırk dokuz elli' assert (text_to_word_sequence(text) == [u'ali', u'veli', u'kırk', u'dokuz', u'elli']) def test_text_to_word_sequence_unicode_multichar_split(): text = u'ali!stopveli?stopkırkstopdokuzstopelli' assert (text_to_word_sequence(text, split='stop') == [u'ali', u'veli', u'kırk', u'dokuz', u'elli']) def test_tokenizer_unicode(): texts = [u'ali veli kırk dokuz elli', u'ali veli kırk dokuz elli veli kırk dokuz'] tokenizer = Tokenizer(num_words=5) tokenizer.fit_on_texts(texts) assert len(tokenizer.word_counts) == 5 def test_tokenizer_oov_flag(): """ Test of Out of Vocabulary (OOV) flag in Tokenizer """ x_train = ['This text has only known words'] x_test = ['This text has some unknown words'] # 2 OOVs: some, unknown # Default, without OOV flag tokenizer = Tokenizer() tokenizer.fit_on_texts(x_train) x_test_seq = tokenizer.texts_to_sequences(x_test) assert len(x_test_seq[0]) == 4 # discards 2 OOVs # With OOV feature tokenizer = Tokenizer(oov_token='') tokenizer.fit_on_texts(x_train) x_test_seq = tokenizer.texts_to_sequences(x_test) assert len(x_test_seq[0]) == 6 # OOVs marked in place if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/integration_tests/preprocessing/image_test.py0000644000000000116100000006126113354530144024616 0ustar rooteng00000000000000import pytest from keras.preprocessing import image from PIL import Image import numpy as np import os import tempfile import shutil class TestImage(object): def setup_class(cls): cls.img_w = cls.img_h = 20 rgb_images = [] gray_images = [] for n in range(8): bias = np.random.rand(cls.img_w, cls.img_h, 1) * 64 variance = np.random.rand(cls.img_w, cls.img_h, 1) * (255 - 64) imarray = np.random.rand(cls.img_w, cls.img_h, 3) * variance + bias im = Image.fromarray(imarray.astype('uint8')).convert('RGB') rgb_images.append(im) imarray = np.random.rand(cls.img_w, cls.img_h, 1) * variance + bias im = Image.fromarray(imarray.astype('uint8').squeeze()).convert('L') gray_images.append(im) cls.all_test_images = [rgb_images, gray_images] def teardown_class(cls): del cls.all_test_images def test_image_data_generator(self, tmpdir): for test_images in self.all_test_images: img_list = [] for im in test_images: img_list.append(image.img_to_array(im)[None, ...]) images = np.vstack(img_list) generator = image.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, rotation_range=90., width_shift_range=0.1, height_shift_range=0.1, shear_range=0.5, zoom_range=0.2, channel_shift_range=0., brightness_range=(1, 5), fill_mode='nearest', cval=0.5, horizontal_flip=True, vertical_flip=True) generator.fit(images, augment=True) num_samples = images.shape[0] for x, y in generator.flow(images, np.arange(num_samples), shuffle=False, save_to_dir=str(tmpdir), batch_size=3): assert x.shape == images[:3].shape assert list(y) == [0, 1, 2] break # Test with sample weights for x, y, w in generator.flow(images, np.arange(num_samples), shuffle=False, sample_weight=np.arange(num_samples) + 1, save_to_dir=str(tmpdir), batch_size=3): assert x.shape == images[:3].shape assert list(y) == [0, 1, 2] assert list(w) == [1, 2, 3] break # Test with `shuffle=True` for x, y in generator.flow(images, np.arange(num_samples), shuffle=True, save_to_dir=str(tmpdir), batch_size=3): assert x.shape == images[:3].shape # Check that the sequence is shuffled. assert list(y) != [0, 1, 2] break # Test without y for x in generator.flow(images, None, shuffle=True, save_to_dir=str(tmpdir), batch_size=3): assert type(x) is np.ndarray assert x.shape == images[:3].shape # Check that the sequence is shuffled. break # Test with a single miscellaneous input data array dsize = images.shape[0] x_misc1 = np.random.random(dsize) for i, (x, y) in enumerate(generator.flow((images, x_misc1), np.arange(dsize), shuffle=False, batch_size=2)): assert x[0].shape == images[:2].shape assert (x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all() if i == 2: break # Test with two miscellaneous inputs x_misc2 = np.random.random((dsize, 3, 3)) for i, (x, y) in enumerate(generator.flow((images, [x_misc1, x_misc2]), np.arange(dsize), shuffle=False, batch_size=2)): assert x[0].shape == images[:2].shape assert (x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all() assert (x[2] == x_misc2[(i * 2):((i + 1) * 2)]).all() if i == 2: break # Test cases with `y = None` x = generator.flow(images, None, batch_size=3).next() assert type(x) is np.ndarray assert x.shape == images[:3].shape x = generator.flow((images, x_misc1), None, batch_size=3, shuffle=False).next() assert type(x) is list assert x[0].shape == images[:3].shape assert (x[1] == x_misc1[:3]).all() x = generator.flow((images, [x_misc1, x_misc2]), None, batch_size=3, shuffle=False).next() assert type(x) is list assert x[0].shape == images[:3].shape assert (x[1] == x_misc1[:3]).all() assert (x[2] == x_misc2[:3]).all() # Test some failure cases: x_misc_err = np.random.random((dsize + 1, 3, 3)) with pytest.raises(ValueError) as e_info: generator.flow((images, x_misc_err), np.arange(dsize), batch_size=3) assert 'All of the arrays in' in str(e_info.value) with pytest.raises(ValueError) as e_info: generator.flow((images, x_misc1), np.arange(dsize + 1), batch_size=3) assert '`x` (images tensor) and `y` (labels) ' in str(e_info.value) # Test `flow` behavior as Sequence seq = generator.flow(images, np.arange(images.shape[0]), shuffle=False, save_to_dir=str(tmpdir), batch_size=3) assert len(seq) == images.shape[0] // 3 + 1 x, y = seq[0] assert x.shape == images[:3].shape assert list(y) == [0, 1, 2] # Test with `shuffle=True` seq = generator.flow(images, np.arange(images.shape[0]), shuffle=True, save_to_dir=str(tmpdir), batch_size=3, seed=123) x, y = seq[0] # Check that the sequence is shuffled. assert list(y) != [0, 1, 2] # `on_epoch_end` should reshuffle the sequence. seq.on_epoch_end() x2, y2 = seq[0] assert list(y) != list(y2) def test_image_data_generator_with_split_value_error(self): with pytest.raises(ValueError): generator = image.ImageDataGenerator(validation_split=5) def test_image_data_generator_invalid_data(self): generator = image.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, data_format='channels_last') # Test fit with invalid data with pytest.raises(ValueError): x = np.random.random((3, 10, 10)) generator.fit(x) # Test flow with invalid data with pytest.raises(ValueError): x = np.random.random((32, 10, 10)) generator.flow(np.arange(x.shape[0])) def test_image_data_generator_fit(self): generator = image.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, zoom_range=(0.2, 0.2), data_format='channels_last') # Test grayscale x = np.random.random((32, 10, 10, 1)) generator.fit(x) # Test RBG x = np.random.random((32, 10, 10, 3)) generator.fit(x) # Test more samples than dims x = np.random.random((32, 4, 4, 1)) generator.fit(x) generator = image.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, data_format='channels_first') # Test grayscale x = np.random.random((32, 1, 10, 10)) generator.fit(x) # Test RBG x = np.random.random((32, 3, 10, 10)) generator.fit(x) # Test more samples than dims x = np.random.random((32, 1, 4, 4)) generator.fit(x) def test_directory_iterator(self, tmpdir): num_classes = 2 # create folders and subfolders paths = [] for cl in range(num_classes): class_directory = 'class-{}'.format(cl) classpaths = [ class_directory, os.path.join(class_directory, 'subfolder-1'), os.path.join(class_directory, 'subfolder-2'), os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') ] for path in classpaths: tmpdir.join(path).mkdir() paths.append(classpaths) # save the images in the paths count = 0 filenames = [] for test_images in self.all_test_images: for im in test_images: # rotate image class im_class = count % num_classes # rotate subfolders classpaths = paths[im_class] filename = os.path.join(classpaths[count % len(classpaths)], 'image-{}.jpg'.format(count)) filenames.append(filename) im.save(str(tmpdir / filename)) count += 1 # create iterator generator = image.ImageDataGenerator() dir_iterator = generator.flow_from_directory(str(tmpdir)) # check number of classes and images assert len(dir_iterator.class_indices) == num_classes assert len(dir_iterator.classes) == count assert set(dir_iterator.filenames) == set(filenames) # Test invalid use cases with pytest.raises(ValueError): generator.flow_from_directory(str(tmpdir), color_mode='cmyk') with pytest.raises(ValueError): generator.flow_from_directory(str(tmpdir), class_mode='output') def preprocessing_function(x): """This will fail if not provided by a Numpy array. Note: This is made to enforce backward compatibility. """ assert x.shape == (26, 26, 3) assert type(x) is np.ndarray return np.zeros_like(x) # Test usage as Sequence generator = image.ImageDataGenerator( preprocessing_function=preprocessing_function) dir_seq = generator.flow_from_directory(str(tmpdir), target_size=(26, 26), color_mode='rgb', batch_size=3, class_mode='categorical') assert len(dir_seq) == count // 3 + 1 x1, y1 = dir_seq[1] assert x1.shape == (3, 26, 26, 3) assert y1.shape == (3, num_classes) x1, y1 = dir_seq[5] assert (x1 == 0).all() with pytest.raises(ValueError): x1, y1 = dir_seq[9] def test_directory_iterator_class_mode_input(self, tmpdir): tmpdir.join('class-1').mkdir() # save the images in the paths count = 0 for test_images in self.all_test_images: for im in test_images: filename = str(tmpdir / 'class-1' / 'image-{}.jpg'.format(count)) im.save(filename) count += 1 # create iterator generator = image.ImageDataGenerator() dir_iterator = generator.flow_from_directory(str(tmpdir), class_mode='input') batch = next(dir_iterator) # check if input and output have the same shape assert(batch[0].shape == batch[1].shape) # check if the input and output images are not the same numpy array input_img = batch[0][0] output_img = batch[1][0] output_img[0][0][0] += 1 assert(input_img[0][0][0] != output_img[0][0][0]) @pytest.mark.parametrize('validation_split,num_training', [ (0.25, 12), (0.40, 10), (0.50, 8), ]) def test_directory_iterator_with_validation_split(self, validation_split, num_training): num_classes = 2 tmp_folder = tempfile.mkdtemp(prefix='test_images') # create folders and subfolders paths = [] for cl in range(num_classes): class_directory = 'class-{}'.format(cl) classpaths = [ class_directory, os.path.join(class_directory, 'subfolder-1'), os.path.join(class_directory, 'subfolder-2'), os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') ] for path in classpaths: os.mkdir(os.path.join(tmp_folder, path)) paths.append(classpaths) # save the images in the paths count = 0 filenames = [] for test_images in self.all_test_images: for im in test_images: # rotate image class im_class = count % num_classes # rotate subfolders classpaths = paths[im_class] filename = os.path.join(classpaths[count % len(classpaths)], 'image-{}.jpg'.format(count)) filenames.append(filename) im.save(os.path.join(tmp_folder, filename)) count += 1 # create iterator generator = image.ImageDataGenerator(validation_split=validation_split) with pytest.raises(ValueError): generator.flow_from_directory(tmp_folder, subset='foo') train_iterator = generator.flow_from_directory(tmp_folder, subset='training') assert train_iterator.samples == num_training valid_iterator = generator.flow_from_directory(tmp_folder, subset='validation') assert valid_iterator.samples == count - num_training # check number of classes and images assert len(train_iterator.class_indices) == num_classes assert len(train_iterator.classes) == num_training assert len(set(train_iterator.filenames) & set(filenames)) == num_training shutil.rmtree(tmp_folder) def test_img_utils(self): height, width = 10, 8 # Test th data format x = np.random.random((3, height, width)) img = image.array_to_img(x, data_format='channels_first') assert img.size == (width, height) x = image.img_to_array(img, data_format='channels_first') assert x.shape == (3, height, width) # Test 2D x = np.random.random((1, height, width)) img = image.array_to_img(x, data_format='channels_first') assert img.size == (width, height) x = image.img_to_array(img, data_format='channels_first') assert x.shape == (1, height, width) # Test tf data format x = np.random.random((height, width, 3)) img = image.array_to_img(x, data_format='channels_last') assert img.size == (width, height) x = image.img_to_array(img, data_format='channels_last') assert x.shape == (height, width, 3) # Test 2D x = np.random.random((height, width, 1)) img = image.array_to_img(x, data_format='channels_last') assert img.size == (width, height) x = image.img_to_array(img, data_format='channels_last') assert x.shape == (height, width, 1) # Test invalid use case with pytest.raises(ValueError): x = np.random.random((height, width)) # not 3D img = image.array_to_img(x, data_format='channels_first') with pytest.raises(ValueError): # unknown data_format x = np.random.random((height, width, 3)) img = image.array_to_img(x, data_format='channels') with pytest.raises(ValueError): # neither RGB nor gray-scale x = np.random.random((height, width, 5)) img = image.array_to_img(x, data_format='channels_last') with pytest.raises(ValueError): # unknown data_format x = np.random.random((height, width, 3)) img = image.img_to_array(x, data_format='channels') with pytest.raises(ValueError): # neither RGB nor gray-scale x = np.random.random((height, width, 5, 3)) img = image.img_to_array(x, data_format='channels_last') def test_random_transforms(self): x = np.random.random((2, 28, 28)) assert image.random_rotation(x, 45).shape == (2, 28, 28) assert image.random_shift(x, 1, 1).shape == (2, 28, 28) assert image.random_shear(x, 20).shape == (2, 28, 28) assert image.random_zoom(x, (5, 5)).shape == (2, 28, 28) assert image.random_channel_shift(x, 20).shape == (2, 28, 28) # Test get_random_transform with predefined seed seed = 1 generator = image.ImageDataGenerator( rotation_range=90., width_shift_range=0.1, height_shift_range=0.1, shear_range=0.5, zoom_range=0.2, channel_shift_range=0.1, brightness_range=(1, 5), horizontal_flip=True, vertical_flip=True) transform_dict = generator.get_random_transform(x.shape, seed) transform_dict2 = generator.get_random_transform(x.shape, seed * 2) assert transform_dict['theta'] != 0 assert transform_dict['theta'] != transform_dict2['theta'] assert transform_dict['tx'] != 0 assert transform_dict['tx'] != transform_dict2['tx'] assert transform_dict['ty'] != 0 assert transform_dict['ty'] != transform_dict2['ty'] assert transform_dict['shear'] != 0 assert transform_dict['shear'] != transform_dict2['shear'] assert transform_dict['zx'] != 0 assert transform_dict['zx'] != transform_dict2['zx'] assert transform_dict['zy'] != 0 assert transform_dict['zy'] != transform_dict2['zy'] assert transform_dict['channel_shift_intensity'] != 0 assert (transform_dict['channel_shift_intensity'] != transform_dict2['channel_shift_intensity']) assert transform_dict['brightness'] != 0 assert transform_dict['brightness'] != transform_dict2['brightness'] # Test get_random_transform without any randomness generator = image.ImageDataGenerator() transform_dict = generator.get_random_transform(x.shape, seed) assert transform_dict['theta'] == 0 assert transform_dict['tx'] == 0 assert transform_dict['ty'] == 0 assert transform_dict['shear'] == 0 assert transform_dict['zx'] == 1 assert transform_dict['zy'] == 1 assert transform_dict['channel_shift_intensity'] is None assert transform_dict['brightness'] is None def test_deterministic_transform(self): x = np.ones((32, 32, 3)) generator = image.ImageDataGenerator( rotation_range=90, fill_mode='constant') x = np.random.random((32, 32, 3)) assert np.allclose(generator.apply_transform(x, {'flip_vertical': True}), x[::-1, :, :]) assert np.allclose(generator.apply_transform(x, {'flip_horizontal': True}), x[:, ::-1, :]) x = np.ones((3, 3, 3)) x_rotated = np.array([[[0., 0., 0.], [0., 0., 0.], [1., 1., 1.]], [[0., 0., 0.], [1., 1., 1.], [1., 1., 1.]], [[0., 0., 0.], [0., 0., 0.], [1., 1., 1.]]]) assert np.allclose(generator.apply_transform(x, {'theta': 45}), x_rotated) assert np.allclose(image.apply_affine_transform( x, theta=45, channel_axis=2, fill_mode='constant'), x_rotated) def test_batch_standardize(self): # ImageDataGenerator.standardize should work on batches for test_images in self.all_test_images: img_list = [] for im in test_images: img_list.append(image.img_to_array(im)[None, ...]) images = np.vstack(img_list) generator = image.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, rotation_range=90., width_shift_range=0.1, height_shift_range=0.1, shear_range=0.5, zoom_range=0.2, channel_shift_range=0., brightness_range=(1, 5), fill_mode='nearest', cval=0.5, horizontal_flip=True, vertical_flip=True) generator.fit(images, augment=True) transformed = np.copy(images) for i, im in enumerate(transformed): transformed[i] = generator.random_transform(im) transformed = generator.standardize(transformed) def test_load_img(self, tmpdir): filename = str(tmpdir / 'image.png') original_im_array = np.array(255 * np.random.rand(100, 100, 3), dtype=np.uint8) original_im = image.array_to_img(original_im_array, scale=False) original_im.save(filename) # Test that loaded image is exactly equal to original. loaded_im = image.load_img(filename) loaded_im_array = image.img_to_array(loaded_im) assert loaded_im_array.shape == original_im_array.shape assert np.all(loaded_im_array == original_im_array) loaded_im = image.load_img(filename, grayscale=True) loaded_im_array = image.img_to_array(loaded_im) assert loaded_im_array.shape == (original_im_array.shape[0], original_im_array.shape[1], 1) # Test that nothing is changed when target size is equal to original. loaded_im = image.load_img(filename, target_size=(100, 100)) loaded_im_array = image.img_to_array(loaded_im) assert loaded_im_array.shape == original_im_array.shape assert np.all(loaded_im_array == original_im_array) loaded_im = image.load_img(filename, grayscale=True, target_size=(100, 100)) loaded_im_array = image.img_to_array(loaded_im) assert loaded_im_array.shape == (original_im_array.shape[0], original_im_array.shape[1], 1) # Test down-sampling with bilinear interpolation. loaded_im = image.load_img(filename, target_size=(25, 25)) loaded_im_array = image.img_to_array(loaded_im) assert loaded_im_array.shape == (25, 25, 3) loaded_im = image.load_img(filename, grayscale=True, target_size=(25, 25)) loaded_im_array = image.img_to_array(loaded_im) assert loaded_im_array.shape == (25, 25, 1) # Test down-sampling with nearest neighbor interpolation. loaded_im_nearest = image.load_img(filename, target_size=(25, 25), interpolation="nearest") loaded_im_array_nearest = image.img_to_array(loaded_im_nearest) assert loaded_im_array_nearest.shape == (25, 25, 3) assert np.any(loaded_im_array_nearest != loaded_im_array) # Check that exception is raised if interpolation not supported. loaded_im = image.load_img(filename, interpolation="unsupported") with pytest.raises(ValueError): loaded_im = image.load_img(filename, target_size=(25, 25), interpolation="unsupported") if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/integration_tests/test_image_data_tasks.py0000644000000000116100000000322613354530144024126 0ustar rooteng00000000000000from __future__ import print_function import numpy as np import pytest from keras.utils.test_utils import get_test_data from keras.models import Sequential from keras import layers from keras.utils.np_utils import to_categorical def test_image_classification(): np.random.seed(1337) input_shape = (16, 16, 3) (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500, num_test=200, input_shape=input_shape, classification=True, num_classes=4) y_train = to_categorical(y_train) y_test = to_categorical(y_test) model = Sequential([ layers.Conv2D(filters=8, kernel_size=3, activation='relu', input_shape=input_shape), layers.MaxPooling2D(pool_size=2), layers.Conv2D(filters=4, kernel_size=(3, 3), activation='relu', padding='same'), layers.GlobalAveragePooling2D(), layers.Dense(y_test.shape[-1], activation='softmax') ]) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.summary() history = model.fit(x_train, y_train, epochs=10, batch_size=16, validation_data=(x_test, y_test), verbose=0) assert history.history['val_acc'][-1] > 0.75 config = model.get_config() model = Sequential.from_config(config) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/integration_tests/applications_test.py0000644000000000116100000000524013354530144023332 0ustar rooteng00000000000000import pytest import random import os from multiprocessing import Process, Queue from keras import applications from keras import backend as K pytestmark = pytest.mark.skipif( os.environ.get('CORE_CHANGED', 'True') == 'False' and os.environ.get('APP_CHANGED', 'True') == 'False', reason='Runs only when the relevant files have been modified.') MODEL_LIST = [ (applications.ResNet50, 2048), (applications.VGG16, 512), (applications.VGG19, 512), (applications.Xception, 2048), (applications.InceptionV3, 2048), (applications.InceptionResNetV2, 1536), (applications.MobileNet, 1024), (applications.MobileNetV2, 1280), (applications.DenseNet121, 1024), (applications.DenseNet169, 1664), (applications.DenseNet201, 1920), # Note that NASNetLarge is too heavy to test on Travis. (applications.NASNetMobile, 1056) ] def _get_output_shape(model_fn): if K.backend() == 'cntk': # Create model in a subprocess so that # the memory consumed by InceptionResNetV2 will be # released back to the system after this test # (to deal with OOM error on CNTK backend). # TODO: remove the use of multiprocessing from these tests # once a memory clearing mechanism # is implemented in the CNTK backend. def target(queue): model = model_fn() queue.put(model.output_shape) queue = Queue() p = Process(target=target, args=(queue,)) p.start() p.join() # The error in a subprocess won't propagate # to the main process, so we check if the model # is successfully created by checking if the output shape # has been put into the queue assert not queue.empty(), 'Model creation failed.' return queue.get_nowait() else: model = model_fn() return model.output_shape def _test_application_basic(app, last_dim=1000): output_shape = _get_output_shape(lambda: app(weights=None)) assert output_shape == (None, last_dim) def _test_application_notop(app, last_dim): output_shape = _get_output_shape( lambda: app(weights=None, include_top=False)) assert output_shape == (None, None, None, last_dim) def test_mobilenet_v2_legacy_import(): from keras.applications import mobilenetv2 assert hasattr(mobilenetv2, 'MobileNetV2') from keras.applications import mobilenet_v2 assert hasattr(mobilenet_v2, 'MobileNetV2') def test_applications(): for _ in range(3): app, last_dim = random.choice(MODEL_LIST) _test_application_basic(app) _test_application_notop(app, last_dim) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/test_loss_weighting.py0000644000000000116100000002577313354530144020141 0ustar rooteng00000000000000from __future__ import absolute_import from __future__ import print_function import pytest import numpy as np from keras import backend as K from keras.utils.test_utils import get_test_data from keras.models import Sequential, Model from keras.layers import Dense, Activation, GRU, TimeDistributed, Input from keras.utils import np_utils from numpy.testing import assert_almost_equal, assert_array_almost_equal num_classes = 10 batch_size = 128 epochs = 15 weighted_class = 5 high_weight = 10 train_samples = 5000 test_samples = 1000 timesteps = 3 input_dim = 10 loss = 'mse' loss_full_name = 'mean_squared_error' standard_weight = 1 standard_score_sequential = 0.5 decimal_precision = { 'cntk': 2, 'theano': 6, 'tensorflow': 6 } def _get_test_data(): np.random.seed(1337) (x_train, y_train), (x_test, y_test) = get_test_data(num_train=train_samples, num_test=test_samples, input_shape=(input_dim,), classification=True, num_classes=num_classes) int_y_test = y_test.copy() int_y_train = y_train.copy() # convert class vectors to binary class matrices y_train = np_utils.to_categorical(y_train, num_classes) y_test = np_utils.to_categorical(y_test, num_classes) test_ids = np.where(int_y_test == np.array(weighted_class))[0] class_weight = dict([(i, standard_weight) for i in range(num_classes)]) class_weight[weighted_class] = high_weight sample_weight = np.ones((y_train.shape[0])) * standard_weight sample_weight[int_y_train == weighted_class] = high_weight return ((x_train, y_train), (x_test, y_test), (sample_weight, class_weight, test_ids)) def create_sequential_model(): model = Sequential() model.add(Dense(32, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(num_classes)) model.add(Activation('softmax')) return model def create_temporal_sequential_model(): model = Sequential() model.add(GRU(32, input_shape=(timesteps, input_dim), return_sequences=True)) model.add(TimeDistributed(Dense(num_classes))) model.add(Activation('softmax')) return model def test_sequential_class_weights(): model = create_sequential_model() model.compile(loss=loss, optimizer='rmsprop') ((x_train, y_train), (x_test, y_test), (sample_weight, class_weight, test_ids)) = _get_test_data() model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs // 3, verbose=0, class_weight=class_weight, validation_data=(x_train, y_train, sample_weight)) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs // 2, verbose=0, class_weight=class_weight) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs // 2, verbose=0, class_weight=class_weight, validation_split=0.1) model.train_on_batch(x_train[:32], y_train[:32], class_weight=class_weight) score = model.evaluate(x_test[test_ids, :], y_test[test_ids, :], verbose=0) assert(score < standard_score_sequential) def test_sequential_sample_weights(): model = create_sequential_model() model.compile(loss=loss, optimizer='rmsprop') ((x_train, y_train), (x_test, y_test), (sample_weight, class_weight, test_ids)) = _get_test_data() model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs // 3, verbose=0, sample_weight=sample_weight) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs // 3, verbose=0, sample_weight=sample_weight, validation_split=0.1) model.train_on_batch(x_train[:32], y_train[:32], sample_weight=sample_weight[:32]) model.test_on_batch(x_train[:32], y_train[:32], sample_weight=sample_weight[:32]) score = model.evaluate(x_test[test_ids, :], y_test[test_ids, :], verbose=0) assert(score < standard_score_sequential) def test_sequential_temporal_sample_weights(): ((x_train, y_train), (x_test, y_test), (sample_weight, class_weight, test_ids)) = _get_test_data() temporal_x_train = np.reshape(x_train, (len(x_train), 1, x_train.shape[1])) temporal_x_train = np.repeat(temporal_x_train, timesteps, axis=1) temporal_x_test = np.reshape(x_test, (len(x_test), 1, x_test.shape[1])) temporal_x_test = np.repeat(temporal_x_test, timesteps, axis=1) temporal_y_train = np.reshape(y_train, (len(y_train), 1, y_train.shape[1])) temporal_y_train = np.repeat(temporal_y_train, timesteps, axis=1) temporal_y_test = np.reshape(y_test, (len(y_test), 1, y_test.shape[1])) temporal_y_test = np.repeat(temporal_y_test, timesteps, axis=1) temporal_sample_weight = np.reshape(sample_weight, (len(sample_weight), 1)) temporal_sample_weight = np.repeat(temporal_sample_weight, timesteps, axis=1) model = create_temporal_sequential_model() model.compile(loss=loss, optimizer='rmsprop', sample_weight_mode='temporal') model.fit(temporal_x_train, temporal_y_train, batch_size=batch_size, epochs=epochs // 3, verbose=0, sample_weight=temporal_sample_weight) model.fit(temporal_x_train, temporal_y_train, batch_size=batch_size, epochs=epochs // 3, verbose=0, sample_weight=temporal_sample_weight, validation_split=0.1) model.train_on_batch(temporal_x_train[:32], temporal_y_train[:32], sample_weight=temporal_sample_weight[:32]) model.test_on_batch(temporal_x_train[:32], temporal_y_train[:32], sample_weight=temporal_sample_weight[:32]) score = model.evaluate(temporal_x_test[test_ids], temporal_y_test[test_ids], verbose=0) assert(score < standard_score_sequential) def test_weighted_metrics_with_sample_weight(): decimal = decimal_precision[K.backend()] model = create_sequential_model() model.compile(loss=loss, optimizer='rmsprop', metrics=[loss], weighted_metrics=[loss]) ((x_train, y_train), (x_test, y_test), (sample_weight, class_weight, test_ids)) = _get_test_data() history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs // 3, verbose=0, sample_weight=sample_weight) h = history.history assert_array_almost_equal(h['loss'], h['weighted_' + loss_full_name], decimal=decimal) history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs // 3, verbose=0, sample_weight=sample_weight, validation_split=0.1) h = history.history assert_almost_equal(h['val_loss'], h['val_weighted_' + loss_full_name], decimal=decimal) model.train_on_batch(x_train[:32], y_train[:32], sample_weight=sample_weight[:32]) model.test_on_batch(x_train[:32], y_train[:32], sample_weight=sample_weight[:32]) test_sample_weight = np.ones((y_test.shape[0])) * standard_weight test_sample_weight[test_ids] = high_weight scores = model.evaluate(x_test, y_test, verbose=0, sample_weight=test_sample_weight) loss_score, metric_score, weighted_metric_score = scores assert loss_score < standard_score_sequential assert loss_score != metric_score assert_almost_equal(loss_score, weighted_metric_score, decimal=decimal) def test_weighted_metrics_with_no_sample_weight(): decimal = decimal_precision[K.backend()] model = create_sequential_model() model.compile(loss=loss, optimizer='rmsprop', metrics=[loss], weighted_metrics=[loss]) (x_train, y_train), (x_test, y_test), _ = _get_test_data() history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs // 3, verbose=0) h = history.history assert_array_almost_equal(h['loss'], h[loss_full_name], decimal=decimal) assert_array_almost_equal(h['loss'], h['weighted_' + loss_full_name], decimal=decimal) history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs // 3, verbose=0, validation_split=0.1) h = history.history assert_array_almost_equal(h['val_loss'], h['val_' + loss_full_name], decimal=decimal) assert_array_almost_equal(h['val_loss'], h['val_weighted_' + loss_full_name], decimal=decimal) model.train_on_batch(x_train[:32], y_train[:32]) model.test_on_batch(x_train[:32], y_train[:32]) scores = model.evaluate(x_test, y_test, verbose=0) loss_score, metric_score, weighted_metric_score = scores assert_almost_equal(loss_score, metric_score, decimal=decimal) assert_almost_equal(loss_score, weighted_metric_score, decimal=decimal) def test_weighted_metrics_with_weighted_accuracy_metric(): model = create_sequential_model() model.compile(loss=loss, optimizer='rmsprop', metrics=['acc'], weighted_metrics=['acc']) (x_train, y_train), _, (sample_weight, _, _) = _get_test_data() history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs // 3, verbose=0, sample_weight=sample_weight) assert history.history['acc'] != history.history['weighted_acc'] def test_weighted_metrics_with_multiple_outputs(): decimal = decimal_precision[K.backend()] inputs = Input(shape=(5,)) x = Dense(5)(inputs) output1 = Dense(1, name='output1')(x) output2 = Dense(1, name='output2')(x) model = Model(inputs=inputs, outputs=[output1, output2]) metrics = {'output1': [loss], 'output2': [loss]} weighted_metrics = {'output2': [loss]} loss_map = {'output1': loss, 'output2': loss} model.compile(loss=loss_map, optimizer='sgd', metrics=metrics, weighted_metrics=weighted_metrics) x = np.array([[1, 1, 1, 1, 1]]) y = {'output1': np.array([0]), 'output2': np.array([1])} weight = 5 history = model.fit(x, y, sample_weight={'output2': np.array([weight])}) unweighted_metric = history.history['output2_' + loss_full_name][0] weighted_metric = history.history['output2_weighted_' + loss_full_name][0] assert_almost_equal(unweighted_metric * weight, weighted_metric, decimal=decimal) def test_class_weight_wrong_classes(): model = create_sequential_model() model.compile(loss=loss, optimizer='rmsprop') ((x_train, y_train), (x_test, y_test), (sample_weight, class_weight, test_ids)) = _get_test_data() del class_weight[1] with pytest.raises(ValueError): model.fit(x_train, y_train, epochs=0, verbose=0, class_weight=class_weight) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/test_multiprocessing.py0000644000000000116100000011202713354530144020330 0ustar rooteng00000000000000from __future__ import print_function import os import threading import pytest import numpy as np from keras.models import Sequential from keras.layers.core import Dense from keras.utils import Sequence from keras import backend as K pytestmark = pytest.mark.skipif( K.backend() == 'tensorflow', reason='Temporarily disabled until the use_multiprocessing problem is solved') STEPS_PER_EPOCH = 100 STEPS = 100 WORKERS = 4 if K.backend() != 'tensorflow' else 2 class DummySequence(Sequence): def __getitem__(self, idx): return np.zeros([10, 2]), np.ones([10]) def __len__(self): return 10 class threadsafe_iter: """Takes an iterator/generator and makes it thread-safe by serializing call to the `next` method of given iterator/generator. """ def __init__(self, it): self.it = it self.lock = threading.Lock() def __iter__(self): return self def __next__(self): return self.next() def next(self): with self.lock: return next(self.it) def threadsafe_generator(f): """A decorator that takes a generator function and makes it thread-safe. """ def g(*a, **kw): return threadsafe_iter(f(*a, **kw)) return g @pytest.fixture def in_tmpdir(tmpdir): """Runs a function in a temporary directory. Checks that the directory is empty afterwards. """ with tmpdir.as_cwd(): yield None assert not tmpdir.listdir() def test_multiprocessing_training(): arr_data = np.random.randint(0, 256, (50, 2)) arr_labels = np.random.randint(0, 2, 50) arr_weights = np.random.random(50) @threadsafe_generator def custom_generator(use_weights=False): batch_size = 10 n_samples = 50 while True: batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start: end] y = arr_labels[start: end] if use_weights: w = arr_weights[start: end] yield X, y, w else: yield X, y # Build a NN model = Sequential() model.add(Dense(1, input_shape=(2, ))) model.compile(loss='mse', optimizer='adadelta') # - Produce data on 4 worker processes, consume on main process: # - Each worker process runs OWN copy of generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `fit_generator()` raises ValueError # exception and does not attempt to run the generator. if os.name is 'nt': with pytest.raises(ValueError): model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, epochs=1, verbose=1, validation_steps=None, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) else: model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, epochs=1, verbose=1, validation_steps=None, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) # - Produce data on 4 worker threads, consume on main thread: # - All worker threads share the SAME generator model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, epochs=1, verbose=1, validation_steps=None, max_queue_size=10, workers=WORKERS, use_multiprocessing=False) # - Produce data on 1 worker process, consume on main process: # - Worker process runs generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `fit_generator()` raises ValueError # exception and does not attempt to run the generator. if os.name is 'nt': with pytest.raises(ValueError): model.fit_generator(custom_generator(True), steps_per_epoch=STEPS_PER_EPOCH, validation_data=(arr_data[:10], arr_labels[:10], arr_weights[:10]), validation_steps=1, max_queue_size=10, workers=1, use_multiprocessing=True) else: model.fit_generator(custom_generator(True), steps_per_epoch=STEPS_PER_EPOCH, validation_data=(arr_data[:10], arr_labels[:10], arr_weights[:10]), validation_steps=1, max_queue_size=10, workers=1, use_multiprocessing=True) # - Produce data on 1 worker thread, consume on main thread: # - Worker thread is the only thread running the generator model.fit_generator(custom_generator(True), steps_per_epoch=STEPS_PER_EPOCH, validation_data=(arr_data[:10], arr_labels[:10], arr_weights[:10]), validation_steps=1, max_queue_size=10, workers=1, use_multiprocessing=False) # - Produce data on 1 worker process, consume on main process: # - Worker process runs generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `fit_generator()` raises ValueError # exception and does not attempt to run the generator. if os.name is 'nt': with pytest.raises(ValueError): model.fit_generator(custom_generator(True), steps_per_epoch=STEPS_PER_EPOCH, validation_data=custom_generator(True), validation_steps=1, max_queue_size=10, workers=1, use_multiprocessing=True) else: model.fit_generator(custom_generator(True), steps_per_epoch=STEPS_PER_EPOCH, validation_data=custom_generator(True), validation_steps=1, max_queue_size=10, workers=1, use_multiprocessing=True) # - Produce data on 1 worker thread AT A TIME, consume on main thread: # - Worker threads for training and validation run generator SEQUENTIALLY model.fit_generator(custom_generator(True), steps_per_epoch=STEPS_PER_EPOCH, validation_data=custom_generator(True), validation_steps=1, max_queue_size=10, workers=1, use_multiprocessing=False) # - Produce and consume data without a queue on main thread # - Make sure the value of `use_multiprocessing` is ignored model.fit_generator(custom_generator(True), steps_per_epoch=STEPS_PER_EPOCH, validation_data=custom_generator(True), validation_steps=1, max_queue_size=10, workers=0, use_multiprocessing=True) model.fit_generator(custom_generator(True), steps_per_epoch=STEPS_PER_EPOCH, validation_data=custom_generator(True), validation_steps=1, max_queue_size=10, workers=0, use_multiprocessing=False) # - For Sequence model.fit_generator(DummySequence(), steps_per_epoch=STEPS_PER_EPOCH, validation_data=custom_generator(True), validation_steps=1, max_queue_size=10, workers=0, use_multiprocessing=True) model.fit_generator(DummySequence(), steps_per_epoch=STEPS_PER_EPOCH, validation_data=custom_generator(True), validation_steps=1, max_queue_size=10, workers=0, use_multiprocessing=False) # Test invalid use cases @threadsafe_generator def invalid_generator(): while True: yield arr_data[:10], arr_data[:10], arr_labels[:10], arr_labels[:10] # not specified `validation_steps` with pytest.raises(ValueError): model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, validation_data=custom_generator(), validation_steps=None, max_queue_size=10, workers=1, use_multiprocessing=False) # validation data is neither a tuple nor a triple. with pytest.raises(ValueError): model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, validation_data=(arr_data[:10], arr_data[:10], arr_labels[:10], arr_weights[:10]), validation_steps=1, max_queue_size=10, workers=1, use_multiprocessing=False) # validation generator is neither a tuple nor a triple. with pytest.raises(ValueError): model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, validation_data=invalid_generator(), validation_steps=1, max_queue_size=10, workers=1, use_multiprocessing=False) def test_multiprocessing_training_from_file(in_tmpdir): arr_data = np.random.randint(0, 256, (50, 2)) arr_labels = np.random.randint(0, 2, 50) np.savez('data.npz', **{'data': arr_data, 'labels': arr_labels}) @threadsafe_generator def custom_generator(): batch_size = 10 n_samples = 50 arr = np.load('data.npz') while True: batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr['data'][start: end] y = arr['labels'][start: end] yield X, y # Build a NN model = Sequential() model.add(Dense(1, input_shape=(2, ))) model.compile(loss='mse', optimizer='adadelta') # - Produce data on 4 worker processes, consume on main process: # - Each worker process runs OWN copy of generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `fit_generator()` raises ValueError # exception and does not attempt to run the generator. if os.name is 'nt': with pytest.raises(ValueError): model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, epochs=1, verbose=1, validation_steps=None, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) else: model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, epochs=1, verbose=1, validation_steps=None, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) # - Produce data on 4 worker threads, consume on main thread: # - All worker threads share the SAME generator model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, epochs=1, verbose=1, validation_steps=None, max_queue_size=10, workers=WORKERS, use_multiprocessing=False) # - Produce data on 1 worker process, consume on main process: # - Worker process runs generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `fit_generator()` raises ValueError # exception and does not attempt to run the generator. if os.name is 'nt': with pytest.raises(ValueError): model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, epochs=1, verbose=1, validation_steps=None, max_queue_size=10, workers=1, use_multiprocessing=True) else: model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, epochs=1, verbose=1, validation_steps=None, max_queue_size=10, workers=1, use_multiprocessing=True) # - Produce data on 1 worker thread, consume on main thread: # - Worker thread is the only thread running the generator model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, epochs=1, verbose=1, validation_steps=None, max_queue_size=10, workers=1, use_multiprocessing=False) # - Produce and consume data without a queue on main thread # - Make sure the value of `use_multiprocessing` is ignored model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, epochs=1, verbose=1, validation_steps=None, max_queue_size=10, workers=0, use_multiprocessing=True) model.fit_generator(custom_generator(), steps_per_epoch=STEPS_PER_EPOCH, epochs=1, verbose=1, validation_steps=None, max_queue_size=10, workers=0, use_multiprocessing=False) os.remove('data.npz') def test_multiprocessing_predicting(): arr_data = np.random.randint(0, 256, (50, 2)) @threadsafe_generator def custom_generator(): batch_size = 10 n_samples = 50 while True: batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start: end] yield X # Build a NN model = Sequential() model.add(Dense(1, input_shape=(2, ))) model.compile(loss='mse', optimizer='adadelta') # - Produce data on 4 worker processes, consume on main process: # - Each worker process runs OWN copy of generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `predict_generator()` raises ValueError # exception and does not attempt to run the generator. if os.name is 'nt': with pytest.raises(ValueError): model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) else: model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) # - Produce data on 4 worker threads, consume on main thread: # - All worker threads share the SAME generator model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=WORKERS, use_multiprocessing=False) # - Produce data on 1 worker process, consume on main process: # - Worker process runs generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `predict_generator()` raises ValueError # exception and does not attempt to run the generator. if os.name is 'nt': with pytest.raises(ValueError): model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=1, use_multiprocessing=True) else: model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=1, use_multiprocessing=True) # - Produce data on 1 worker thread, consume on main thread: # - Worker thread is the only thread running the generator model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=1, use_multiprocessing=False) # - Main thread runs the generator without a queue # - Make sure the value of `use_multiprocessing` is ignored model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=0, use_multiprocessing=True) model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=0, use_multiprocessing=False) def test_multiprocessing_evaluating(): arr_data = np.random.randint(0, 256, (50, 2)) arr_labels = np.random.randint(0, 2, 50) @threadsafe_generator def custom_generator(): batch_size = 10 n_samples = 50 while True: batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start: end] y = arr_labels[start: end] yield X, y # Build a NN model = Sequential() model.add(Dense(1, input_shape=(2, ))) model.compile(loss='mse', optimizer='adadelta') # - Produce data on 4 worker processes, consume on main process: # - Each worker process runs OWN copy of generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries # -> make sure `evaluate_generator()` raises raises ValueError # exception and does not attempt to run the generator. if os.name is 'nt': with pytest.raises(ValueError): model.evaluate_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) else: model.evaluate_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) # - Produce data on 4 worker threads, consume on main thread: # - All worker threads share the SAME generator model.evaluate_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=WORKERS, use_multiprocessing=False) # - Produce data on 1 worker process, consume on main process: # - Worker process runs generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `evaluate_generator()` raises ValueError # exception and does not attempt to run the generator. if os.name is 'nt': with pytest.raises(ValueError): model.evaluate_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=1, use_multiprocessing=True) else: model.evaluate_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=1, use_multiprocessing=True) # - Produce data on 1 worker thread, consume on main thread: # - Worker thread is the only thread running the generator model.evaluate_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=1, use_multiprocessing=False) # - Produce and consume data without a queue on main thread # - Make sure the value of `use_multiprocessing` is ignored model.evaluate_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=0, use_multiprocessing=True) model.evaluate_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=0, use_multiprocessing=False) def test_multiprocessing_fit_error(): arr_data = np.random.randint(0, 256, (50, 2)) arr_labels = np.random.randint(0, 2, 50) batch_size = 10 n_samples = 50 good_batches = 3 @threadsafe_generator def custom_generator(use_weights=False): """Raises an exception after a few good batches""" for i in range(good_batches): batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start: end] y = arr_labels[start: end] yield X, y raise RuntimeError model = Sequential() model.add(Dense(1, input_shape=(2, ))) model.compile(loss='mse', optimizer='adadelta') samples = batch_size * (good_batches + 1) # - Produce data on 4 worker processes, consume on main process: # - Each worker process runs OWN copy of generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `fit_generator()` raises ValueError # exception and does not attempt to run the generator. # - On other platforms, make sure `RuntimeError` exception bubbles up if os.name is 'nt': with pytest.raises(RuntimeError): model.fit_generator(custom_generator(), steps_per_epoch=samples, validation_steps=None, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) else: with pytest.raises(RuntimeError): model.fit_generator(custom_generator(), steps_per_epoch=samples, validation_steps=None, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) # - Produce data on 4 worker threads, consume on main thread: # - All worker threads share the SAME generator # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.fit_generator(custom_generator(), steps_per_epoch=samples, validation_steps=None, max_queue_size=10, workers=WORKERS, use_multiprocessing=False) # - Produce data on 1 worker process, consume on main process: # - Worker process runs generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `fit_generator()` raises ValueError # exception and does not attempt to run the generator. # - On other platforms, make sure `RuntimeError` exception bubbles up if os.name is 'nt': with pytest.raises(RuntimeError): model.fit_generator(custom_generator(), steps_per_epoch=samples, validation_steps=None, max_queue_size=10, workers=1, use_multiprocessing=True) else: with pytest.raises(RuntimeError): model.fit_generator(custom_generator(), steps_per_epoch=samples, validation_steps=None, max_queue_size=10, workers=1, use_multiprocessing=True) # - Produce data on 1 worker thread, consume on main thread: # - Worker thread is the only thread running the generator # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.fit_generator(custom_generator(), steps_per_epoch=samples, validation_steps=None, max_queue_size=10, workers=1, use_multiprocessing=False) # - Produce and consume data without a queue on main thread # - Make sure the value of `use_multiprocessing` is ignored # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.fit_generator(custom_generator(), steps_per_epoch=samples, validation_steps=None, max_queue_size=10, workers=0, use_multiprocessing=True) with pytest.raises(RuntimeError): model.fit_generator(custom_generator(), steps_per_epoch=samples, validation_steps=None, max_queue_size=10, workers=0, use_multiprocessing=False) def test_multiprocessing_evaluate_error(): arr_data = np.random.randint(0, 256, (50, 2)) arr_labels = np.random.randint(0, 2, 50) batch_size = 10 n_samples = 50 good_batches = 3 @threadsafe_generator def custom_generator(): """Raises an exception after a few good batches""" for i in range(good_batches): batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start: end] y = arr_labels[start: end] yield X, y raise RuntimeError model = Sequential() model.add(Dense(1, input_shape=(2, ))) model.compile(loss='mse', optimizer='adadelta') # - Produce data on 4 worker processes, consume on main process: # - Each worker process runs OWN copy of generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `evaluate_generator()` raises ValueError # exception and does not attempt to run the generator. # - On other platforms, make sure `RuntimeError` exception bubbles up if os.name is 'nt': with pytest.raises(ValueError): model.evaluate_generator(custom_generator(), steps=good_batches * WORKERS + 1, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) else: with pytest.raises(RuntimeError): model.evaluate_generator(custom_generator(), steps=good_batches * WORKERS + 1, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) # - Produce data on 4 worker threads, consume on main thread: # - All worker threads share the SAME generator # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.evaluate_generator(custom_generator(), steps=good_batches * WORKERS + 1, max_queue_size=10, workers=WORKERS, use_multiprocessing=False) # - Produce data on 1 worker process, consume on main process: # - Worker process runs generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `evaluate_generator()` raises ValueError # exception and does not attempt to run the generator. # - On other platforms, make sure `RuntimeError` exception bubbles up if os.name is 'nt': with pytest.raises(RuntimeError): model.evaluate_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=1, use_multiprocessing=True) else: with pytest.raises(RuntimeError): model.evaluate_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=1, use_multiprocessing=True) # - Produce data on 1 worker thread, consume on main thread: # - Worker thread is the only thread running the generator # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.evaluate_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=1, use_multiprocessing=False) # - Produce and consume data without a queue on main thread # - Make sure the value of `use_multiprocessing` is ignored # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.evaluate_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=0, use_multiprocessing=True) with pytest.raises(RuntimeError): model.evaluate_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=0, use_multiprocessing=False) def test_multiprocessing_predict_error(): arr_data = np.random.randint(0, 256, (50, 2)) good_batches = 3 @threadsafe_generator def custom_generator(): """Raises an exception after a few good batches""" batch_size = 10 n_samples = 50 for i in range(good_batches): batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start: end] yield X raise RuntimeError model = Sequential() model.add(Dense(1, input_shape=(2, ))) model.compile(loss='mse', optimizer='adadelta') # - Produce data on 4 worker processes, consume on main process: # - Each worker process runs OWN copy of generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `predict_generator()` raises ValueError # exception and does not attempt to run the generator. # - On other platforms, make sure `RuntimeError` exception bubbles up if os.name is 'nt': with pytest.raises(StopIteration): model.predict_generator(custom_generator(), steps=good_batches * WORKERS + 1, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) else: with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches * WORKERS + 1, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) # - Produce data on 4 worker threads, consume on main thread: # - All worker threads share the SAME generator # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches * WORKERS + 1, max_queue_size=10, workers=WORKERS, use_multiprocessing=False) # - Produce data on 1 worker process, consume on main process: # - Worker process runs generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `predict_generator()` raises ValueError # exception and does not attempt to run the generator. # - On other platforms, make sure `RuntimeError` exception bubbles up if os.name is 'nt': with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=1, use_multiprocessing=True) else: with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=1, use_multiprocessing=True) # - Produce data on 1 worker thread, consume on main thread: # - Worker thread is the only thread running the generator # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=1, use_multiprocessing=False) # - Produce and consume data without a queue on main thread # - Make sure the value of `use_multiprocessing` is ignored # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=0, use_multiprocessing=True) with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=0, use_multiprocessing=False) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/test_doc_auto_generation.py0000644000000000116100000003243513305602621021111 0ustar rooteng00000000000000from docs import autogen import pytest test_doc1 = { 'doc': """Base class for recurrent layers. # Arguments cell: A RNN cell instance. A RNN cell is a class that has: - a `call(input_at_t, states_at_t)` method, returning `(output_at_t, states_at_t_plus_1)`. The call method of the cell can also take the optional argument `constants`, see section "Note on passing external constants" below. - a `state_size` attribute. This can be a single integer (single state) in which case it is the size of the recurrent state (which should be the same as the size of the cell output). This can also be a list/tuple of integers (one size per state). In this case, the first entry (`state_size[0]`) should be the same as the size of the cell output. It is also possible for `cell` to be a list of RNN cell instances, in which cases the cells get stacked on after the other in the RNN, implementing an efficient stacked RNN. return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence. stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used. Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. input_dim: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model. input_length: Length of input sequences, to be specified when it is constant. This argument is required if you are going to connect `Flatten` then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed). Note that if the recurrent layer is not the first layer in your model, you would need to specify the input length at the level of the first layer (e.g. via the `input_shape` argument) # Input shape 3D tensor with shape `(batch_size, timesteps, input_dim)`. # Output shape - if `return_state`: a list of tensors. The first tensor is the output. The remaining tensors are the last states, each with shape `(batch_size, units)`. - if `return_sequences`: 3D tensor with shape `(batch_size, timesteps, units)`. - else, 2D tensor with shape `(batch_size, units)`. # Masking This layer supports masking for input data with a variable number of timesteps. To introduce masks to your data, use an [Embedding](embeddings.md) layer with the `mask_zero` parameter set to `True`. # Note on using statefulness in RNNs You can set RNN layers to be 'stateful', which means that the states computed for the samples in one batch will be reused as initial states for the samples in the next batch. This assumes a one-to-one mapping between samples in different successive batches. To enable statefulness: - specify `stateful=True` in the layer constructor. - specify a fixed batch size for your model, by passing if sequential model: `batch_input_shape=(...)` to the first layer in your model. else for functional model with 1 or more Input layers: `batch_shape=(...)` to all the first layers in your model. This is the expected shape of your inputs *including the batch size*. It should be a tuple of integers, e.g. `(32, 10, 100)`. - specify `shuffle=False` when calling fit(). To reset the states of your model, call `.reset_states()` on either a specific layer, or on your entire model. # Note on specifying the initial state of RNNs Note: that One: You can specify the initial state of RNN layers symbolically by calling them with the keyword argument `initial_state`. Two: The value of `initial_state` should be a tensor or list of tensors representing the initial state of the RNN layer. You can specify the initial state of RNN layers numerically by: One: calling `reset_states` - With the keyword argument `states`. - The value of `states` should be a numpy array or list of numpy arrays representing the initial state of the RNN layer. # Note on passing external constants to RNNs You can pass "external" constants to the cell using the `constants` keyword: argument of `RNN.__call__` (as well as `RNN.call`) method. This: requires that the `cell.call` method accepts the same keyword argument `constants`. Such constants can be used to condition the cell transformation on additional static inputs (not changing over time), a.k.a. an attention mechanism. # Examples ```python # First, let's define a RNN Cell, as a layer subclass. class MinimalRNNCell(keras.layers.Layer): def __init__(self, units, **kwargs): self.units = units self.state_size = units super(MinimalRNNCell, self).__init__(**kwargs) def build(self, input_shape): self.kernel = self.add_weight(shape=(input_shape[-1], self.units), initializer='uniform', name='kernel') self.recurrent_kernel = self.add_weight( shape=(self.units, self.units), initializer='uniform', name='recurrent_kernel') self.built = True def call(self, inputs, states): prev_output = states[0] h = K.dot(inputs, self.kernel) output = h + K.dot(prev_output, self.recurrent_kernel) return output, [output] # Let's use this cell in a RNN layer: cell = MinimalRNNCell(32) x = keras.Input((None, 5)) layer = RNN(cell) y = layer(x) # Here's how to use the cell to build a stacked RNN: cells = [MinimalRNNCell(32), MinimalRNNCell(64)] x = keras.Input((None, 5)) layer = RNN(cells) y = layer(x) ``` """, 'result': '''Base class for recurrent layers. __Arguments__ - __cell__: A RNN cell instance. A RNN cell is a class that has: - a `call(input_at_t, states_at_t)` method, returning `(output_at_t, states_at_t_plus_1)`. The call method of the cell can also take the optional argument `constants`, see section "Note on passing external constants" below. - a `state_size` attribute. This can be a single integer (single state) in which case it is the size of the recurrent state (which should be the same as the size of the cell output). This can also be a list/tuple of integers (one size per state). In this case, the first entry (`state_size[0]`) should be the same as the size of the cell output. It is also possible for `cell` to be a list of RNN cell instances, in which cases the cells get stacked on after the other in the RNN, implementing an efficient stacked RNN. - __return_sequences__: Boolean. Whether to return the last output in the output sequence, or the full sequence. - __return_state__: Boolean. Whether to return the last state in addition to the output. - __go_backwards__: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence. - __stateful__: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. - __unroll__: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used. Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. - __input_dim__: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model. - __input_length__: Length of input sequences, to be specified when it is constant. This argument is required if you are going to connect `Flatten` then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed). Note that if the recurrent layer is not the first layer in your model, you would need to specify the input length at the level of the first layer (e.g. via the `input_shape` argument) __Input shape__ 3D tensor with shape `(batch_size, timesteps, input_dim)`. __Output shape__ - if `return_state`: a list of tensors. The first tensor is the output. The remaining tensors are the last states, each with shape `(batch_size, units)`. - if `return_sequences`: 3D tensor with shape `(batch_size, timesteps, units)`. - else, 2D tensor with shape `(batch_size, units)`. __Masking__ This layer supports masking for input data with a variable number of timesteps. To introduce masks to your data, use an [Embedding](embeddings.md) layer with the `mask_zero` parameter set to `True`. __Note on using statefulness in RNNs__ You can set RNN layers to be 'stateful', which means that the states computed for the samples in one batch will be reused as initial states for the samples in the next batch. This assumes a one-to-one mapping between samples in different successive batches. To enable statefulness: - specify `stateful=True` in the layer constructor. - specify a fixed batch size for your model, by passing if sequential model: `batch_input_shape=(...)` to the first layer in your model. else for functional model with 1 or more Input layers: `batch_shape=(...)` to all the first layers in your model. This is the expected shape of your inputs *including the batch size*. It should be a tuple of integers, e.g. `(32, 10, 100)`. - specify `shuffle=False` when calling fit(). To reset the states of your model, call `.reset_states()` on either a specific layer, or on your entire model. __Note on specifying the initial state of RNNs__ Note: that - __One__: You can specify the initial state of RNN layers symbolically by calling them with the keyword argument `initial_state`. - __Two__: The value of `initial_state` should be a tensor or list of tensors representing the initial state of the RNN layer. You can specify the initial state of RNN layers numerically by: - __One__: calling `reset_states` - With the keyword argument `states`. - The value of `states` should be a numpy array or list of numpy arrays representing the initial state of the RNN layer. __Note on passing external constants to RNNs__ You can pass "external" constants to the cell using the `constants` - __keyword__: argument of `RNN.__call__` (as well as `RNN.call`) method. - __This__: requires that the `cell.call` method accepts the same keyword argument `constants`. Such constants can be used to condition the cell transformation on additional static inputs (not changing over time), a.k.a. an attention mechanism. __Examples__ ```python # First, let's define a RNN Cell, as a layer subclass. class MinimalRNNCell(keras.layers.Layer): def __init__(self, units, **kwargs): self.units = units self.state_size = units super(MinimalRNNCell, self).__init__(**kwargs) def build(self, input_shape): self.kernel = self.add_weight(shape=(input_shape[-1], self.units), initializer='uniform', name='kernel') self.recurrent_kernel = self.add_weight( shape=(self.units, self.units), initializer='uniform', name='recurrent_kernel') self.built = True def call(self, inputs, states): prev_output = states[0] h = K.dot(inputs, self.kernel) output = h + K.dot(prev_output, self.recurrent_kernel) return output, [output] # Let's use this cell in a RNN layer: cell = MinimalRNNCell(32) x = keras.Input((None, 5)) layer = RNN(cell) y = layer(x) # Here's how to use the cell to build a stacked RNN: cells = [MinimalRNNCell(32), MinimalRNNCell(64)] x = keras.Input((None, 5)) layer = RNN(cells) y = layer(x) ``` '''} def test_doc_lists(): docstring = autogen.process_docstring(test_doc1['doc']) assert docstring == test_doc1['result'] if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/test_model_pickling.py0000644000000000116100000001030213354530144020052 0ustar rooteng00000000000000import pytest import os import sys import tempfile import numpy as np from numpy.testing import assert_allclose from numpy.testing import assert_raises from keras import backend as K from keras.models import Model, Sequential from keras.layers import Dense, Lambda, RepeatVector, TimeDistributed from keras.layers import Input from keras import optimizers from keras import losses from keras import metrics if sys.version_info[0] == 3: import pickle else: import cPickle as pickle skipif_no_tf_gpu = pytest.mark.skipif( (K.backend() != 'tensorflow') or (not K.tensorflow_backend._get_available_gpus()), reason='Requires TensorFlow backend and a GPU') def test_sequential_model_pickling(): model = Sequential() model.add(Dense(2, input_shape=(3,))) model.add(RepeatVector(3)) model.add(TimeDistributed(Dense(3))) model.compile(loss=losses.MSE, optimizer=optimizers.RMSprop(lr=0.0001), metrics=[metrics.categorical_accuracy], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) out = model.predict(x) state = pickle.dumps(model) new_model = pickle.loads(state) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05) # test that new updates are the same with both models x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) new_model.train_on_batch(x, y) out = model.predict(x) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_sequential_model_pickling_2(): # test with custom optimizer, loss custom_opt = optimizers.rmsprop custom_loss = losses.mse model = Sequential() model.add(Dense(2, input_shape=(3,))) model.add(Dense(3)) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) state = pickle.dumps(model) model = pickle.loads(state) out2 = model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_functional_model_pickling(): inputs = Input(shape=(3,)) x = Dense(2)(inputs) outputs = Dense(3)(x) model = Model(inputs, outputs) model.compile(loss=losses.MSE, optimizer=optimizers.Adam(), metrics=[metrics.categorical_accuracy]) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) state = pickle.dumps(model) model = pickle.loads(state) out2 = model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_pickling_multiple_metrics_outputs(): inputs = Input(shape=(5,)) x = Dense(5)(inputs) output1 = Dense(1, name='output1')(x) output2 = Dense(1, name='output2')(x) model = Model(inputs=inputs, outputs=[output1, output2]) metrics = {'output1': ['mse', 'binary_accuracy'], 'output2': ['mse', 'binary_accuracy'] } loss = {'output1': 'mse', 'output2': 'mse'} model.compile(loss=loss, optimizer='sgd', metrics=metrics) # assure that model is working x = np.array([[1, 1, 1, 1, 1]]) out = model.predict(x) model = pickle.loads(pickle.dumps(model)) out2 = model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_pickling_without_compilation(): """Test pickling model without compiling. """ model = Sequential() model.add(Dense(2, input_shape=(3,))) model.add(Dense(3)) model = pickle.loads(pickle.dumps(model)) def test_pickling_right_after_compilation(): model = Sequential() model.add(Dense(2, input_shape=(3,))) model.add(Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) model._make_train_function() model = pickle.loads(pickle.dumps(model)) def test_pickling_unused_layers_is_ok(): a = Input(shape=(256, 512, 6)) b = Input(shape=(256, 512, 1)) c = Lambda(lambda x: x[:, :, :, :1])(a) model = Model(inputs=[a, b], outputs=c) model = pickle.loads(pickle.dumps(model)) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/test_model_saving.py0000644000000000116100000006531213355226611017556 0ustar rooteng00000000000000import pytest import os import h5py import tempfile import numpy as np from numpy.testing import assert_allclose from numpy.testing import assert_raises from keras import backend as K from keras.engine.saving import preprocess_weights_for_loading from keras.models import Model, Sequential from keras.layers import Dense, Lambda, RepeatVector, TimeDistributed from keras.layers import Bidirectional, GRU, LSTM, CuDNNGRU, CuDNNLSTM from keras.layers import Conv2D, Flatten from keras.layers import Input, InputLayer from keras.initializers import Constant from keras import optimizers from keras import losses from keras import metrics from keras.models import save_model, load_model skipif_no_tf_gpu = pytest.mark.skipif( (K.backend() != 'tensorflow' or not K.tensorflow_backend._get_available_gpus()), reason='Requires TensorFlow backend and a GPU') def test_sequential_model_saving(): model = Sequential() model.add(Dense(2, input_shape=(3,))) model.add(RepeatVector(3)) model.add(TimeDistributed(Dense(3))) model.compile(loss=losses.MSE, optimizer=optimizers.RMSprop(lr=0.0001), metrics=[metrics.categorical_accuracy], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) out = model.predict(x) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) new_model = load_model(fname) os.remove(fname) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05) # test that new updates are the same with both models x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) new_model.train_on_batch(x, y) out = model.predict(x) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_sequential_model_saving_2(): # test with custom optimizer, loss custom_opt = optimizers.rmsprop custom_loss = losses.mse model = Sequential() model.add(Dense(2, input_shape=(3,))) model.add(Dense(3)) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model = load_model(fname, custom_objects={'custom_opt': custom_opt, 'custom_loss': custom_loss}) os.remove(fname) out2 = model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_functional_model_saving(): inputs = Input(shape=(3,)) x = Dense(2)(inputs) outputs = Dense(3)(x) model = Model(inputs, outputs) model.compile(loss=losses.MSE, optimizer=optimizers.Adam(), metrics=[metrics.categorical_accuracy]) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model = load_model(fname) os.remove(fname) out2 = model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_model_saving_to_pre_created_h5py_file(): inputs = Input(shape=(3,)) x = Dense(2)(inputs) outputs = Dense(3)(x) model = Model(inputs, outputs) model.compile(loss=losses.MSE, optimizer=optimizers.Adam(), metrics=[metrics.categorical_accuracy]) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) _, fname = tempfile.mkstemp('.h5') with h5py.File(fname, mode='r+') as h5file: save_model(model, h5file) loaded_model = load_model(h5file) out2 = loaded_model.predict(x) assert_allclose(out, out2, atol=1e-05) # test non-default options in h5 with h5py.File('does not matter', driver='core', backing_store=False) as h5file: save_model(model, h5file) loaded_model = load_model(h5file) out2 = loaded_model.predict(x) assert_allclose(out, out2, atol=1e-05) with h5py.File(fname, mode='r+') as h5file: g = h5file.create_group('model') save_model(model, g) loaded_model = load_model(g) out2 = loaded_model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_model_saving_to_binary_stream(): inputs = Input(shape=(3,)) x = Dense(2)(inputs) outputs = Dense(3)(x) model = Model(inputs, outputs) model.compile(loss=losses.MSE, optimizer=optimizers.Adam(), metrics=[metrics.categorical_accuracy]) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) _, fname = tempfile.mkstemp('.h5') with h5py.File(fname, mode='r+') as h5file: save_model(model, h5file) loaded_model = load_model(h5file) out2 = loaded_model.predict(x) assert_allclose(out, out2, atol=1e-05) # Save the model to an in-memory-only h5 file. with h5py.File('does not matter', driver='core', backing_store=False) as h5file: save_model(model, h5file) h5file.flush() # Very important! Otherwise you get all zeroes below. binary_data = h5file.fid.get_file_image() # Make sure the binary data is correct by saving it to a file manually # and then loading it the usual way. with open(fname, 'wb') as raw_file: raw_file.write(binary_data) # Load the manually-saved binary data, and make sure the model is intact. with h5py.File(fname, mode='r') as h5file: loaded_model = load_model(h5file) out2 = loaded_model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_saving_multiple_metrics_outputs(): inputs = Input(shape=(5,)) x = Dense(5)(inputs) output1 = Dense(1, name='output1')(x) output2 = Dense(1, name='output2')(x) model = Model(inputs=inputs, outputs=[output1, output2]) metrics = {'output1': ['mse', 'binary_accuracy'], 'output2': ['mse', 'binary_accuracy'] } loss = {'output1': 'mse', 'output2': 'mse'} model.compile(loss=loss, optimizer='sgd', metrics=metrics) # assure that model is working x = np.array([[1, 1, 1, 1, 1]]) out = model.predict(x) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model = load_model(fname) os.remove(fname) out2 = model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_saving_without_compilation(): """Test saving model without compiling. """ model = Sequential() model.add(Dense(2, input_shape=(3,))) model.add(Dense(3)) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model = load_model(fname) os.remove(fname) def test_saving_right_after_compilation(): model = Sequential() model.add(Dense(2, input_shape=(3,))) model.add(Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) model._make_train_function() _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model = load_model(fname) os.remove(fname) def test_saving_unused_layers_is_ok(): a = Input(shape=(256, 512, 6)) b = Input(shape=(256, 512, 1)) c = Lambda(lambda x: x[:, :, :, :1])(a) model = Model(inputs=[a, b], outputs=c) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) load_model(fname) os.remove(fname) def test_loading_weights_by_name_and_reshape(): """ test loading model weights by name on: - sequential model """ # test with custom optimizer, loss custom_opt = optimizers.rmsprop custom_loss = losses.mse # sequential model model = Sequential() model.add(Conv2D(2, (1, 1), input_shape=(1, 1, 1), name='rick')) model.add(Flatten()) model.add(Dense(3, name='morty')) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) x = np.random.random((1, 1, 1, 1)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) old_weights = [layer.get_weights() for layer in model.layers] _, fname = tempfile.mkstemp('.h5') model.save_weights(fname) # delete and recreate model del(model) model = Sequential() model.add(Conv2D(2, (1, 1), input_shape=(1, 1, 1), name='rick')) model.add(Conv2D(3, (1, 1), name='morty')) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) # load weights from first model with pytest.raises(ValueError): model.load_weights(fname, by_name=True, reshape=False) with pytest.raises(ValueError): model.load_weights(fname, by_name=False, reshape=False) model.load_weights(fname, by_name=False, reshape=True) model.load_weights(fname, by_name=True, reshape=True) out2 = model.predict(x) assert_allclose(np.squeeze(out), np.squeeze(out2), atol=1e-05) for i in range(len(model.layers)): new_weights = model.layers[i].get_weights() for j in range(len(new_weights)): # only compare layers that have weights, skipping Flatten() if old_weights[i]: assert_allclose(old_weights[i][j], new_weights[j], atol=1e-05) # delete and recreate model with `use_bias=False` del(model) model = Sequential() model.add(Conv2D(2, (1, 1), input_shape=(1, 1, 1), use_bias=False, name='rick')) model.add(Flatten()) model.add(Dense(3, name='morty')) with pytest.raises(ValueError, match=r'.* expects [0-9]+ .* but the saved .* [0-9]+ .*'): model.load_weights(fname) with pytest.raises(ValueError, match=r'.* expects [0-9]+ .* but the saved .* [0-9]+ .*'): model.load_weights(fname, by_name=True) with pytest.warns(UserWarning, match=r'Skipping loading .* due to mismatch .*'): model.load_weights(fname, by_name=True, skip_mismatch=True) # delete and recreate model with `filters=10` del(model) model = Sequential() model.add(Conv2D(10, (1, 1), input_shape=(1, 1, 1), name='rick')) with pytest.raises(ValueError, match=r'.* has shape .* but the saved .* shape .*'): model.load_weights(fname, by_name=True) with pytest.raises(ValueError, match=r'.* load .* [0-9]+ layers into .* [0-9]+ layers.'): model.load_weights(fname) os.remove(fname) def test_loading_weights_by_name_2(): """ test loading model weights by name on: - both sequential and functional api models - different architecture with shared names """ # test with custom optimizer, loss custom_opt = optimizers.rmsprop custom_loss = losses.mse # sequential model model = Sequential() model.add(Dense(2, input_shape=(3,), name='rick')) model.add(Dense(3, name='morty')) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) old_weights = [layer.get_weights() for layer in model.layers] _, fname = tempfile.mkstemp('.h5') model.save_weights(fname) # delete and recreate model using Functional API del(model) data = Input(shape=(3,)) rick = Dense(2, name='rick')(data) jerry = Dense(3, name='jerry')(rick) # add 2 layers (but maintain shapes) jessica = Dense(2, name='jessica')(jerry) morty = Dense(3, name='morty')(jessica) model = Model(inputs=[data], outputs=[morty]) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) # load weights from first model model.load_weights(fname, by_name=True) os.remove(fname) out2 = model.predict(x) assert np.max(np.abs(out - out2)) > 1e-05 rick = model.layers[1].get_weights() jerry = model.layers[2].get_weights() jessica = model.layers[3].get_weights() morty = model.layers[4].get_weights() assert_allclose(old_weights[0][0], rick[0], atol=1e-05) assert_allclose(old_weights[0][1], rick[1], atol=1e-05) assert_allclose(old_weights[1][0], morty[0], atol=1e-05) assert_allclose(old_weights[1][1], morty[1], atol=1e-05) assert_allclose(np.zeros_like(jerry[1]), jerry[1]) # biases init to 0 assert_allclose(np.zeros_like(jessica[1]), jessica[1]) # biases init to 0 def test_loading_weights_by_name_skip_mismatch(): """ test skipping layers while loading model weights by name on: - sequential model """ # test with custom optimizer, loss custom_opt = optimizers.rmsprop custom_loss = losses.mse # sequential model model = Sequential() model.add(Dense(2, input_shape=(3,), name='rick')) model.add(Dense(3, name='morty')) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) old_weights = [layer.get_weights() for layer in model.layers] _, fname = tempfile.mkstemp('.h5') model.save_weights(fname) # delete and recreate model del(model) model = Sequential() model.add(Dense(2, input_shape=(3,), name='rick')) model.add(Dense(4, name='morty')) # different shape w.r.t. previous model model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) # load weights from first model with pytest.warns(UserWarning): # expect UserWarning for skipping weights model.load_weights(fname, by_name=True, skip_mismatch=True) os.remove(fname) # assert layers 'rick' are equal for old, new in zip(old_weights[0], model.layers[0].get_weights()): assert_allclose(old, new, atol=1e-05) # assert layers 'morty' are not equal, since we skipped loading this layer for old, new in zip(old_weights[1], model.layers[1].get_weights()): assert_raises(AssertionError, assert_allclose, old, new, atol=1e-05) # a function to be called from the Lambda layer def square_fn(x): return x * x def test_saving_lambda_custom_objects(): inputs = Input(shape=(3,)) x = Lambda(lambda x: square_fn(x), output_shape=(3,))(inputs) outputs = Dense(3)(x) model = Model(inputs, outputs) model.compile(loss=losses.MSE, optimizer=optimizers.RMSprop(lr=0.0001), metrics=[metrics.categorical_accuracy]) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model = load_model(fname, custom_objects={'square_fn': square_fn}) os.remove(fname) out2 = model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_saving_lambda_numpy_array_arguments(): mean = np.random.random((4, 2, 3)) std = np.abs(np.random.random((4, 2, 3))) + 1e-5 inputs = Input(shape=(4, 2, 3)) outputs = Lambda(lambda image, mu, std: (image - mu) / std, arguments={'mu': mean, 'std': std})(inputs) model = Model(inputs, outputs) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model = load_model(fname) os.remove(fname) assert_allclose(mean, model.layers[1].arguments['mu']) assert_allclose(std, model.layers[1].arguments['std']) def test_saving_custom_activation_function(): x = Input(shape=(3,)) output = Dense(3, activation=K.cos)(x) model = Model(x, output) model.compile(loss=losses.MSE, optimizer=optimizers.RMSprop(lr=0.0001), metrics=[metrics.categorical_accuracy]) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model = load_model(fname, custom_objects={'cos': K.cos}) os.remove(fname) out2 = model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_saving_model_with_long_layer_names(): # This layer name will make the `layers_name` HDF5 attribute blow # out of proportion. Note that it fits into the internal HDF5 # attribute memory limit on its own but because h5py converts # the list of layer names into numpy array, which uses the same # amout of memory for every item, it increases the memory # requirements substantially. x = Input(shape=(2,), name='input_' + ('x' * (2**15))) f = x for i in range(4): f = Dense(2, name='dense_%d' % (i,))(f) model = Model(inputs=[x], outputs=[f]) model.compile(loss='mse', optimizer='adam', metrics=['acc']) x = np.random.random((1, 2)) y = np.random.random((1, 2)) model.train_on_batch(x, y) out = model.predict(x) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model = load_model(fname) # Check that the HDF5 files contains chunked array # of layer names. with h5py.File(fname, 'r') as h5file: n_layer_names_arrays = len([attr for attr in h5file['model_weights'].attrs if attr.startswith('layer_names')]) os.remove(fname) # The chunking of layer names array should have happened. assert n_layer_names_arrays > 0 out2 = model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_saving_model_with_long_weights_names(): x = Input(shape=(2,), name='nested_model_input') f = x for i in range(4): f = Dense(2, name='nested_model_dense_%d' % (i,))(f) # This layer name will make the `weights_name` # HDF5 attribute blow out of proportion. f = Dense(2, name='nested_model_output' + ('x' * (2**15)))(f) nested_model = Model(inputs=[x], outputs=[f], name='nested_model') x = Input(shape=(2,), name='outer_model_input') f = nested_model(x) f = Dense(2, name='outer_model_output')(f) model = Model(inputs=[x], outputs=[f]) model.compile(loss='mse', optimizer='adam', metrics=['acc']) x = np.random.random((1, 2)) y = np.random.random((1, 2)) model.train_on_batch(x, y) out = model.predict(x) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model = load_model(fname) # Check that the HDF5 files contains chunked array # of weight names. with h5py.File(fname, 'r') as h5file: attrs = [attr for attr in h5file['model_weights']['nested_model'].attrs if attr.startswith('weight_names')] n_weight_names_arrays = len(attrs) os.remove(fname) # The chunking of layer names array should have happened. assert n_weight_names_arrays > 0 out2 = model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_saving_recurrent_layer_with_init_state(): vector_size = 8 input_length = 20 input_initial_state = Input(shape=(vector_size,)) input_x = Input(shape=(input_length, vector_size)) lstm = LSTM(vector_size, return_sequences=True)( input_x, initial_state=[input_initial_state, input_initial_state]) model = Model(inputs=[input_x, input_initial_state], outputs=[lstm]) _, fname = tempfile.mkstemp('.h5') model.save(fname) loaded_model = load_model(fname) os.remove(fname) def test_saving_recurrent_layer_without_bias(): vector_size = 8 input_length = 20 input_x = Input(shape=(input_length, vector_size)) lstm = LSTM(vector_size, use_bias=False)(input_x) model = Model(inputs=[input_x], outputs=[lstm]) _, fname = tempfile.mkstemp('.h5') model.save(fname) loaded_model = load_model(fname) os.remove(fname) def test_loop_model_saving(): model = Sequential() model.add(Dense(2, input_shape=(3,))) model.compile(loss=losses.MSE, optimizer=optimizers.RMSprop(lr=0.0001), metrics=[metrics.categorical_accuracy]) x = np.random.random((1, 3)) y = np.random.random((1, 2)) _, fname = tempfile.mkstemp('.h5') for _ in range(3): model.train_on_batch(x, y) save_model(model, fname, overwrite=True) out = model.predict(x) new_model = load_model(fname) os.remove(fname) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05) def test_saving_constant_initializer_with_numpy(): """Test saving and loading model of constant initializer with numpy inputs. """ model = Sequential() model.add(Dense(2, input_shape=(3,), kernel_initializer=Constant(np.ones((3, 2))))) model.add(Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model = load_model(fname) os.remove(fname) @pytest.mark.parametrize('implementation', [1, 2], ids=['impl1', 'impl2']) @pytest.mark.parametrize('bidirectional', [False, True], ids=['single', 'bidirectional']) @pytest.mark.parametrize('to_cudnn', [False, True], ids=['from_cudnn', 'to_cudnn']) @pytest.mark.parametrize('rnn_type', ['LSTM', 'GRU'], ids=['LSTM', 'GRU']) @pytest.mark.parametrize('model_nest_level', [1, 2], ids=['model_plain', 'model_nested']) @pytest.mark.parametrize('model_type', ['func', 'seq'], ids=['model_func', 'model_seq']) @skipif_no_tf_gpu def test_load_weights_between_noncudnn_rnn(rnn_type, to_cudnn, bidirectional, implementation, model_nest_level, model_type): input_size = 10 timesteps = 6 input_shape = (timesteps, input_size) units = 2 num_samples = 32 inputs = np.random.random((num_samples, timesteps, input_size)) rnn_layer_kwargs = { 'recurrent_activation': 'sigmoid', # ensure biases are non-zero and properly converted 'bias_initializer': 'random_uniform', 'implementation': implementation } if rnn_type == 'LSTM': rnn_layer_class = LSTM cudnn_rnn_layer_class = CuDNNLSTM else: rnn_layer_class = GRU cudnn_rnn_layer_class = CuDNNGRU rnn_layer_kwargs['reset_after'] = True layer = rnn_layer_class(units, **rnn_layer_kwargs) if bidirectional: layer = Bidirectional(layer) cudnn_layer = cudnn_rnn_layer_class(units) if bidirectional: cudnn_layer = Bidirectional(cudnn_layer) model = _make_nested_model(input_shape, layer, model_nest_level, model_type) cudnn_model = _make_nested_model(input_shape, cudnn_layer, model_nest_level, model_type) if to_cudnn: _convert_model_weights(model, cudnn_model) else: _convert_model_weights(cudnn_model, model) assert_allclose(model.predict(inputs), cudnn_model.predict(inputs), atol=1e-4) def _make_nested_model(input_shape, layer, level=1, model_type='func'): # example: make_nested_seq_model((1,), Dense(10), level=2).summary() def make_nested_seq_model(input_shape, layer, level=1): model = layer for i in range(1, level + 1): layers = [InputLayer(input_shape), model] if (i == 1) else [model] model = Sequential(layers) return model # example: make_nested_func_model((1,), Dense(10), level=2).summary() def make_nested_func_model(input_shape, layer, level=1): input = Input(input_shape) model = layer for i in range(level): model = Model(input, model(input)) return model if model_type == 'func': return make_nested_func_model(input_shape, layer, level) elif model_type == 'seq': return make_nested_seq_model(input_shape, layer, level) def _convert_model_weights(source_model, target_model): _, fname = tempfile.mkstemp('.h5') source_model.save_weights(fname) target_model.load_weights(fname) os.remove(fname) @pytest.mark.parametrize('to_cudnn', [False, True], ids=['from_cudnn', 'to_cudnn']) @pytest.mark.parametrize('rnn_type', ['LSTM', 'GRU'], ids=['LSTM', 'GRU']) @skipif_no_tf_gpu def test_load_weights_between_noncudnn_rnn_time_distributed(rnn_type, to_cudnn): """ Similar test as test_load_weights_between_noncudnn_rnn() but has different rank of input due to usage of TimeDistributed. Issue: #10356. """ input_size = 10 steps = 6 timesteps = 6 input_shape = (timesteps, steps, input_size) units = 2 num_samples = 32 inputs = np.random.random((num_samples,) + input_shape) rnn_layer_kwargs = { 'recurrent_activation': 'sigmoid', # ensure biases are non-zero and properly converted 'bias_initializer': 'random_uniform', } if rnn_type == 'LSTM': rnn_layer_class = LSTM cudnn_rnn_layer_class = CuDNNLSTM else: rnn_layer_class = GRU cudnn_rnn_layer_class = CuDNNGRU rnn_layer_kwargs['reset_after'] = True layer = rnn_layer_class(units, **rnn_layer_kwargs) layer = TimeDistributed(layer) cudnn_layer = cudnn_rnn_layer_class(units) cudnn_layer = TimeDistributed(cudnn_layer) model = _make_nested_model(input_shape, layer) cudnn_model = _make_nested_model(input_shape, cudnn_layer) if to_cudnn: _convert_model_weights(model, cudnn_model) else: _convert_model_weights(cudnn_model, model) assert_allclose(model.predict(inputs), cudnn_model.predict(inputs), atol=1e-4) @skipif_no_tf_gpu def test_preprocess_weights_for_loading_gru_incompatible(): """ Loading weights between incompatible layers should fail fast with an exception. """ def gru(cudnn=False, **kwargs): layer_class = CuDNNGRU if cudnn else GRU return layer_class(2, input_shape=[3, 5], **kwargs) def initialize_weights(layer): # A model is needed to initialize weights. _ = Sequential([layer]) return layer def assert_not_compatible(src, dest, message): with pytest.raises(ValueError) as ex: preprocess_weights_for_loading(dest, initialize_weights(src).get_weights()) assert message in ex.value.message assert_not_compatible(gru(), gru(cudnn=True), 'GRU(reset_after=False) is not compatible with CuDNNGRU') assert_not_compatible(gru(cudnn=True), gru(), 'CuDNNGRU is not compatible with GRU(reset_after=False)') assert_not_compatible(gru(), gru(reset_after=True), 'GRU(reset_after=False) is not compatible with ' 'GRU(reset_after=True)') assert_not_compatible(gru(reset_after=True), gru(), 'GRU(reset_after=True) is not compatible with ' 'GRU(reset_after=False)') if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/tests/test_loss_masking.py0000644000000000116100000000231313354530144017566 0ustar rooteng00000000000000import numpy as np import pytest from keras.models import Sequential from keras.engine.training_utils import weighted_masked_objective from keras.layers import TimeDistributed, Masking, Dense from keras import losses from keras import backend as K def test_masking(): np.random.seed(1337) x = np.array([[[1], [1]], [[0], [0]]]) model = Sequential() model.add(Masking(mask_value=0, input_shape=(2, 1))) model.add(TimeDistributed(Dense(1, kernel_initializer='one'))) model.compile(loss='mse', optimizer='sgd') y = np.array([[[1], [1]], [[1], [1]]]) loss = model.train_on_batch(x, y) assert loss == 0 def test_loss_masking(): weighted_loss = weighted_masked_objective(losses.get('mae')) shape = (3, 4, 2) x = np.arange(24).reshape(shape) y = 2 * x # Normally the trailing 1 is added by standardize_weights weights = np.ones((3,)) mask = np.ones((3, 4)) mask[1, 0] = 0 out = K.eval(weighted_loss(K.variable(x), K.variable(y), K.variable(weights), K.variable(mask))) if __name__ == '__main__': pytest.main([__file__]) Keras-2.2.4/MANIFEST.in0000644000000000116100000000014013202372135014050 0ustar rooteng00000000000000include LICENSE include README.md include CONTRIBUTING.md graft docs graft examples graft tests Keras-2.2.4/docs/0000755000000000116100000000000013355226624013261 5ustar rooteng00000000000000Keras-2.2.4/docs/mkdocs.yml0000644000000000116100000000357113227311000015247 0ustar rooteng00000000000000site_name: Keras Documentation theme: readthedocs # theme_dir: theme docs_dir: sources repo_url: http://github.com/keras-team/keras site_url: http://keras.io/ site_description: 'Documentation for Keras, the Python Deep Learning library.' dev_addr: '0.0.0.0:8000' google_analytics: ['UA-61785484-1', 'keras.io'] pages: - Home: index.md - Why use Keras: why-use-keras.md - Getting started: - Guide to the Sequential model: getting-started/sequential-model-guide.md - Guide to the Functional API: getting-started/functional-api-guide.md - FAQ: getting-started/faq.md - Models: - About Keras models: models/about-keras-models.md - Sequential: models/sequential.md - Model (functional API): models/model.md - Layers: - About Keras layers: layers/about-keras-layers.md - Core Layers: layers/core.md - Convolutional Layers: layers/convolutional.md - Pooling Layers: layers/pooling.md - Locally-connected Layers: layers/local.md - Recurrent Layers: layers/recurrent.md - Embedding Layers: layers/embeddings.md - Merge Layers: layers/merge.md - Advanced Activations Layers: layers/advanced-activations.md - Normalization Layers: layers/normalization.md - Noise layers: layers/noise.md - Layer wrappers: layers/wrappers.md - Writing your own Keras layers: layers/writing-your-own-keras-layers.md - Preprocessing: - Sequence Preprocessing: preprocessing/sequence.md - Text Preprocessing: preprocessing/text.md - Image Preprocessing: preprocessing/image.md - Losses: losses.md - Metrics: metrics.md - Optimizers: optimizers.md - Activations: activations.md - Callbacks: callbacks.md - Datasets: datasets.md - Applications: applications.md - Backend: backend.md - Initializers: initializers.md - Regularizers: regularizers.md - Constraints: constraints.md - Visualization: visualization.md - Scikit-learn API: scikit-learn-api.md - Utils: utils.md - Contributing: contributing.md Keras-2.2.4/docs/__init__.py0000644000000000116100000000000013305602621015346 0ustar rooteng00000000000000Keras-2.2.4/docs/autogen.py0000644000000000116100000005243513355226611015302 0ustar rooteng00000000000000# -*- coding: utf-8 -*- ''' General documentation architecture: Home Index - Getting started Getting started with the sequential model Getting started with the functional api FAQ - Models About Keras models explain when one should use Sequential or functional API explain compilation step explain weight saving, weight loading explain serialization, deserialization Sequential Model (functional API) - Layers About Keras layers explain common layer functions: get_weights, set_weights, get_config explain input_shape explain usage on non-Keras tensors Core Layers Convolutional Layers Pooling Layers Locally-connected Layers Recurrent Layers Embedding Layers Merge Layers Advanced Activations Layers Normalization Layers Noise Layers Layer Wrappers Writing your own Keras layers - Preprocessing Sequence Preprocessing Text Preprocessing Image Preprocessing Losses Metrics Optimizers Activations Callbacks Datasets Applications Backend Initializers Regularizers Constraints Visualization Scikit-learn API Utils Contributing ''' from __future__ import print_function from __future__ import unicode_literals import re import inspect import os import shutil import keras from keras import utils from keras import layers from keras.layers import advanced_activations from keras.layers import noise from keras.layers import wrappers from keras import initializers from keras import optimizers from keras import callbacks from keras import models from keras import losses from keras import metrics from keras import backend from keras import activations from keras import preprocessing import sys if sys.version[0] == '2': reload(sys) sys.setdefaultencoding('utf8') EXCLUDE = { 'Optimizer', 'TFOptimizer', 'Wrapper', 'get_session', 'set_session', 'CallbackList', 'serialize', 'deserialize', 'get', 'set_image_dim_ordering', 'normalize_data_format', 'image_dim_ordering', 'get_variable_shape', } # For each class to document, it is possible to: # 1) Document only the class: [classA, classB, ...] # 2) Document all its methods: [classA, (classB, "*")] # 3) Choose which methods to document (methods listed as strings): # [classA, (classB, ["method1", "method2", ...]), ...] # 4) Choose which methods to document (methods listed as qualified names): # [classA, (classB, [module.classB.method1, module.classB.method2, ...]), ...] PAGES = [ { 'page': 'models/sequential.md', 'methods': [ models.Sequential.compile, models.Sequential.fit, models.Sequential.evaluate, models.Sequential.predict, models.Sequential.train_on_batch, models.Sequential.test_on_batch, models.Sequential.predict_on_batch, models.Sequential.fit_generator, models.Sequential.evaluate_generator, models.Sequential.predict_generator, models.Sequential.get_layer, ], }, { 'page': 'models/model.md', 'methods': [ models.Model.compile, models.Model.fit, models.Model.evaluate, models.Model.predict, models.Model.train_on_batch, models.Model.test_on_batch, models.Model.predict_on_batch, models.Model.fit_generator, models.Model.evaluate_generator, models.Model.predict_generator, models.Model.get_layer, ] }, { 'page': 'layers/core.md', 'classes': [ layers.Dense, layers.Activation, layers.Dropout, layers.Flatten, layers.Input, layers.Reshape, layers.Permute, layers.RepeatVector, layers.Lambda, layers.ActivityRegularization, layers.Masking, layers.SpatialDropout1D, layers.SpatialDropout2D, layers.SpatialDropout3D, ], }, { 'page': 'layers/convolutional.md', 'classes': [ layers.Conv1D, layers.Conv2D, layers.SeparableConv1D, layers.SeparableConv2D, layers.Conv2DTranspose, layers.Conv3D, layers.Conv3DTranspose, layers.Cropping1D, layers.Cropping2D, layers.Cropping3D, layers.UpSampling1D, layers.UpSampling2D, layers.UpSampling3D, layers.ZeroPadding1D, layers.ZeroPadding2D, layers.ZeroPadding3D, ], }, { 'page': 'layers/pooling.md', 'classes': [ layers.MaxPooling1D, layers.MaxPooling2D, layers.MaxPooling3D, layers.AveragePooling1D, layers.AveragePooling2D, layers.AveragePooling3D, layers.GlobalMaxPooling1D, layers.GlobalAveragePooling1D, layers.GlobalMaxPooling2D, layers.GlobalAveragePooling2D, layers.GlobalMaxPooling3D, layers.GlobalAveragePooling3D, ], }, { 'page': 'layers/local.md', 'classes': [ layers.LocallyConnected1D, layers.LocallyConnected2D, ], }, { 'page': 'layers/recurrent.md', 'classes': [ layers.RNN, layers.SimpleRNN, layers.GRU, layers.LSTM, layers.ConvLSTM2D, layers.SimpleRNNCell, layers.GRUCell, layers.LSTMCell, layers.CuDNNGRU, layers.CuDNNLSTM, ], }, { 'page': 'layers/embeddings.md', 'classes': [ layers.Embedding, ], }, { 'page': 'layers/normalization.md', 'classes': [ layers.BatchNormalization, ], }, { 'page': 'layers/advanced-activations.md', 'all_module_classes': [advanced_activations], }, { 'page': 'layers/noise.md', 'all_module_classes': [noise], }, { 'page': 'layers/merge.md', 'classes': [ layers.Add, layers.Subtract, layers.Multiply, layers.Average, layers.Maximum, layers.Concatenate, layers.Dot, ], 'functions': [ layers.add, layers.subtract, layers.multiply, layers.average, layers.maximum, layers.concatenate, layers.dot, ] }, { 'page': 'preprocessing/sequence.md', 'functions': [ preprocessing.sequence.pad_sequences, preprocessing.sequence.skipgrams, preprocessing.sequence.make_sampling_table, ], 'classes': [ preprocessing.sequence.TimeseriesGenerator, ] }, { 'page': 'preprocessing/image.md', 'classes': [ (preprocessing.image.ImageDataGenerator, '*') ] }, { 'page': 'preprocessing/text.md', 'functions': [ preprocessing.text.hashing_trick, preprocessing.text.one_hot, preprocessing.text.text_to_word_sequence, ], 'classes': [ preprocessing.text.Tokenizer, ] }, { 'page': 'layers/wrappers.md', 'all_module_classes': [wrappers], }, { 'page': 'metrics.md', 'all_module_functions': [metrics], }, { 'page': 'losses.md', 'all_module_functions': [losses], }, { 'page': 'initializers.md', 'all_module_functions': [initializers], 'all_module_classes': [initializers], }, { 'page': 'optimizers.md', 'all_module_classes': [optimizers], }, { 'page': 'callbacks.md', 'all_module_classes': [callbacks], }, { 'page': 'activations.md', 'all_module_functions': [activations], }, { 'page': 'backend.md', 'all_module_functions': [backend], }, { 'page': 'utils.md', 'functions': [utils.to_categorical, utils.normalize, utils.get_file, utils.print_summary, utils.plot_model, utils.multi_gpu_model], 'classes': [utils.CustomObjectScope, utils.HDF5Matrix, utils.Sequence], }, ] ROOT = 'http://keras.io/' def get_function_signature(function, method=True): wrapped = getattr(function, '_original_function', None) if wrapped is None: signature = inspect.getargspec(function) else: signature = inspect.getargspec(wrapped) defaults = signature.defaults if method: args = signature.args[1:] else: args = signature.args if defaults: kwargs = zip(args[-len(defaults):], defaults) args = args[:-len(defaults)] else: kwargs = [] st = '%s.%s(' % (clean_module_name(function.__module__), function.__name__) for a in args: st += str(a) + ', ' for a, v in kwargs: if isinstance(v, str): v = '\'' + v + '\'' st += str(a) + '=' + str(v) + ', ' if kwargs or args: signature = st[:-2] + ')' else: signature = st + ')' return post_process_signature(signature) def get_class_signature(cls): try: class_signature = get_function_signature(cls.__init__) class_signature = class_signature.replace('__init__', cls.__name__) except (TypeError, AttributeError): # in case the class inherits from object and does not # define __init__ class_signature = "{clean_module_name}.{cls_name}()".format( clean_module_name=clean_module_name(cls.__module__), cls_name=cls.__name__ ) return post_process_signature(class_signature) def post_process_signature(signature): parts = re.split(r'\.(?!\d)', signature) if len(parts) >= 4: if parts[1] == 'layers': signature = 'keras.layers.' + '.'.join(parts[3:]) if parts[1] == 'utils': signature = 'keras.utils.' + '.'.join(parts[3:]) if parts[1] == 'backend': signature = 'keras.backend.' + '.'.join(parts[3:]) return signature def clean_module_name(name): if name.startswith('keras_applications'): name = name.replace('keras_applications', 'keras.applications') if name.startswith('keras_preprocessing'): name = name.replace('keras_preprocessing', 'keras.preprocessing') assert name[:6] == 'keras.', 'Invalid module name: %s' % name return name def class_to_docs_link(cls): module_name = clean_module_name(cls.__module__) module_name = module_name[6:] link = ROOT + module_name.replace('.', '/') + '#' + cls.__name__.lower() return link def class_to_source_link(cls): module_name = clean_module_name(cls.__module__) path = module_name.replace('.', '/') path += '.py' line = inspect.getsourcelines(cls)[-1] link = ('https://github.com/keras-team/' 'keras/blob/master/' + path + '#L' + str(line)) return '[[source]](' + link + ')' def code_snippet(snippet): result = '```python\n' result += snippet + '\n' result += '```\n' return result def count_leading_spaces(s): ws = re.search(r'\S', s) if ws: return ws.start() else: return 0 def process_list_block(docstring, starting_point, leading_spaces, marker): ending_point = docstring.find('\n\n', starting_point) block = docstring[starting_point:(None if ending_point == -1 else ending_point - 1)] # Place marker for later reinjection. docstring = docstring.replace(block, marker) lines = block.split('\n') # Remove the computed number of leading white spaces from each line. lines = [re.sub('^' + ' ' * leading_spaces, '', line) for line in lines] # Usually lines have at least 4 additional leading spaces. # These have to be removed, but first the list roots have to be detected. top_level_regex = r'^ ([^\s\\\(]+):(.*)' top_level_replacement = r'- __\1__:\2' lines = [re.sub(top_level_regex, top_level_replacement, line) for line in lines] # All the other lines get simply the 4 leading space (if present) removed lines = [re.sub(r'^ ', '', line) for line in lines] # Fix text lines after lists indent = 0 text_block = False for i in range(len(lines)): line = lines[i] spaces = re.search(r'\S', line) if spaces: # If it is a list element if line[spaces.start()] == '-': indent = spaces.start() + 1 if text_block: text_block = False lines[i] = '\n' + line elif spaces.start() < indent: text_block = True indent = spaces.start() lines[i] = '\n' + line else: text_block = False indent = 0 block = '\n'.join(lines) return docstring, block def process_docstring(docstring): # First, extract code blocks and process them. code_blocks = [] if '```' in docstring: tmp = docstring[:] while '```' in tmp: tmp = tmp[tmp.find('```'):] index = tmp[3:].find('```') + 6 snippet = tmp[:index] # Place marker in docstring for later reinjection. docstring = docstring.replace( snippet, '$CODE_BLOCK_%d' % len(code_blocks)) snippet_lines = snippet.split('\n') # Remove leading spaces. num_leading_spaces = snippet_lines[-1].find('`') snippet_lines = ([snippet_lines[0]] + [line[num_leading_spaces:] for line in snippet_lines[1:]]) # Most code snippets have 3 or 4 more leading spaces # on inner lines, but not all. Remove them. inner_lines = snippet_lines[1:-1] leading_spaces = None for line in inner_lines: if not line or line[0] == '\n': continue spaces = count_leading_spaces(line) if leading_spaces is None: leading_spaces = spaces if spaces < leading_spaces: leading_spaces = spaces if leading_spaces: snippet_lines = ([snippet_lines[0]] + [line[leading_spaces:] for line in snippet_lines[1:-1]] + [snippet_lines[-1]]) snippet = '\n'.join(snippet_lines) code_blocks.append(snippet) tmp = tmp[index:] # Format docstring lists. section_regex = r'\n( +)# (.*)\n' section_idx = re.search(section_regex, docstring) shift = 0 sections = {} while section_idx and section_idx.group(2): anchor = section_idx.group(2) leading_spaces = len(section_idx.group(1)) shift += section_idx.end() marker = '$' + anchor.replace(' ', '_') + '$' docstring, content = process_list_block(docstring, shift, leading_spaces, marker) sections[marker] = content section_idx = re.search(section_regex, docstring[shift:]) # Format docstring section titles. docstring = re.sub(r'\n(\s+)# (.*)\n', r'\n\1__\2__\n\n', docstring) # Strip all remaining leading spaces. lines = docstring.split('\n') docstring = '\n'.join([line.lstrip(' ') for line in lines]) # Reinject list blocks. for marker, content in sections.items(): docstring = docstring.replace(marker, content) # Reinject code blocks. for i, code_block in enumerate(code_blocks): docstring = docstring.replace( '$CODE_BLOCK_%d' % i, code_block) return docstring print('Cleaning up existing sources directory.') if os.path.exists('sources'): shutil.rmtree('sources') print('Populating sources directory with templates.') for subdir, dirs, fnames in os.walk('templates'): for fname in fnames: new_subdir = subdir.replace('templates', 'sources') if not os.path.exists(new_subdir): os.makedirs(new_subdir) if fname[-3:] == '.md': fpath = os.path.join(subdir, fname) new_fpath = fpath.replace('templates', 'sources') shutil.copy(fpath, new_fpath) def read_file(path): with open(path) as f: return f.read() def collect_class_methods(cls, methods): if isinstance(methods, (list, tuple)): return [getattr(cls, m) if isinstance(m, str) else m for m in methods] methods = [] for _, method in inspect.getmembers(cls, predicate=inspect.isroutine): if method.__name__[0] == '_' or method.__name__ in EXCLUDE: continue methods.append(method) return methods def render_function(function, method=True): subblocks = [] signature = get_function_signature(function, method=method) if method: signature = signature.replace( clean_module_name(function.__module__) + '.', '') subblocks.append('### ' + function.__name__ + '\n') subblocks.append(code_snippet(signature)) docstring = function.__doc__ if docstring: subblocks.append(process_docstring(docstring)) return '\n\n'.join(subblocks) def read_page_data(page_data, type): assert type in ['classes', 'functions', 'methods'] data = page_data.get(type, []) for module in page_data.get('all_module_{}'.format(type), []): module_data = [] for name in dir(module): if name[0] == '_' or name in EXCLUDE: continue module_member = getattr(module, name) if (inspect.isclass(module_member) and type == 'classes' or inspect.isfunction(module_member) and type == 'functions'): instance = module_member if module.__name__ in instance.__module__: if instance not in module_data: module_data.append(instance) module_data.sort(key=lambda x: id(x)) data += module_data return data if __name__ == '__main__': readme = read_file('../README.md') index = read_file('templates/index.md') index = index.replace('{{autogenerated}}', readme[readme.find('##'):]) with open('sources/index.md', 'w') as f: f.write(index) print('Generating docs for Keras %s.' % keras.__version__) for page_data in PAGES: classes = read_page_data(page_data, 'classes') blocks = [] for element in classes: if not isinstance(element, (list, tuple)): element = (element, []) cls = element[0] subblocks = [] signature = get_class_signature(cls) subblocks.append('' + class_to_source_link(cls) + '') if element[1]: subblocks.append('## ' + cls.__name__ + ' class\n') else: subblocks.append('### ' + cls.__name__ + '\n') subblocks.append(code_snippet(signature)) docstring = cls.__doc__ if docstring: subblocks.append(process_docstring(docstring)) methods = collect_class_methods(cls, element[1]) if methods: subblocks.append('\n---') subblocks.append('## ' + cls.__name__ + ' methods\n') subblocks.append('\n---\n'.join( [render_function(method, method=True) for method in methods])) blocks.append('\n'.join(subblocks)) methods = read_page_data(page_data, 'methods') for method in methods: blocks.append(render_function(method, method=True)) functions = read_page_data(page_data, 'functions') for function in functions: blocks.append(render_function(function, method=False)) if not blocks: raise RuntimeError('Found no content for page ' + page_data['page']) mkdown = '\n----\n\n'.join(blocks) # save module page. # Either insert content into existing page, # or create page otherwise page_name = page_data['page'] path = os.path.join('sources', page_name) if os.path.exists(path): template = read_file(path) assert '{{autogenerated}}' in template, ('Template found for ' + path + ' but missing {{autogenerated}}' ' tag.') mkdown = template.replace('{{autogenerated}}', mkdown) print('...inserting autogenerated content into template:', path) else: print('...creating new page with autogenerated content:', path) subdir = os.path.dirname(path) if not os.path.exists(subdir): os.makedirs(subdir) with open(path, 'w') as f: f.write(mkdown) shutil.copyfile('../CONTRIBUTING.md', 'sources/contributing.md') Keras-2.2.4/docs/README.md0000644000000000116100000000073313146670577014553 0ustar rooteng00000000000000# Keras Documentation The source for Keras documentation is in this directory under `sources/`. Our documentation uses extended Markdown, as implemented by [MkDocs](http://mkdocs.org). ## Building the documentation - install MkDocs: `pip install mkdocs` - `cd` to the `docs/` folder and run: - `python autogen.py` - `mkdocs serve` # Starts a local webserver: [localhost:8000](localhost:8000) - `mkdocs build` # Builds a static site in "site" directory Keras-2.2.4/docs/templates/0000755000000000116100000000000013355226624015257 5ustar rooteng00000000000000Keras-2.2.4/docs/templates/callbacks.md0000644000000000116100000000427213146670577017535 0ustar rooteng00000000000000## Usage of callbacks A callback is a set of functions to be applied at given stages of the training procedure. You can use callbacks to get a view on internal states and statistics of the model during training. You can pass a list of callbacks (as the keyword argument `callbacks`) to the `.fit()` method of the `Sequential` or `Model` classes. The relevant methods of the callbacks will then be called at each stage of the training. --- {{autogenerated}} --- # Create a callback You can create a custom callback by extending the base class `keras.callbacks.Callback`. A callback has access to its associated model through the class property `self.model`. Here's a simple example saving a list of losses over each batch during training: ```python class LossHistory(keras.callbacks.Callback): def on_train_begin(self, logs={}): self.losses = [] def on_batch_end(self, batch, logs={}): self.losses.append(logs.get('loss')) ``` --- ### Example: recording loss history ```python class LossHistory(keras.callbacks.Callback): def on_train_begin(self, logs={}): self.losses = [] def on_batch_end(self, batch, logs={}): self.losses.append(logs.get('loss')) model = Sequential() model.add(Dense(10, input_dim=784, kernel_initializer='uniform')) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') history = LossHistory() model.fit(x_train, y_train, batch_size=128, epochs=20, verbose=0, callbacks=[history]) print(history.losses) # outputs ''' [0.66047596406559383, 0.3547245744908703, ..., 0.25953155204159617, 0.25901699725311789] ''' ``` --- ### Example: model checkpoints ```python from keras.callbacks import ModelCheckpoint model = Sequential() model.add(Dense(10, input_dim=784, kernel_initializer='uniform')) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') ''' saves the model weights after each epoch if the validation loss decreased ''' checkpointer = ModelCheckpoint(filepath='/tmp/weights.hdf5', verbose=1, save_best_only=True) model.fit(x_train, y_train, batch_size=128, epochs=20, verbose=0, validation_data=(X_test, Y_test), callbacks=[checkpointer]) ``` Keras-2.2.4/docs/templates/why-use-keras.md0000644000000000116100000001360613354530144020305 0ustar rooteng00000000000000# Why use Keras? There are countless deep learning frameworks available today. Why use Keras rather than any other? Here are some of the areas in which Keras compares favorably to existing alternatives. --- ## Keras prioritizes developer experience - Keras is an API designed for human beings, not machines. [Keras follows best practices for reducing cognitive load](https://blog.keras.io/user-experience-design-for-apis.html): it offers consistent & simple APIs, it minimizes the number of user actions required for common use cases, and it provides clear and actionable feedback upon user error. - This makes Keras easy to learn and easy to use. As a Keras user, you are more productive, allowing you to try more ideas than your competition, faster -- which in turn [helps you win machine learning competitions](https://www.quora.com/Why-has-Keras-been-so-successful-lately-at-Kaggle-competitions). - This ease of use does not come at the cost of reduced flexibility: because Keras integrates with lower-level deep learning languages (in particular TensorFlow), it enables you to implement anything you could have built in the base language. In particular, as `tf.keras`, the Keras API integrates seamlessly with your TensorFlow workflows. --- ## Keras has broad adoption in the industry and the research community

Deep learning frameworks ranking computed by Jeff Hale, based on 11 data sources across 7 categories With over 250,000 individual users as of mid-2018, Keras has stronger adoption in both the industry and the research community than any other deep learning framework except TensorFlow itself (and the Keras API is the official frontend of TensorFlow, via the `tf.keras` module). You are already constantly interacting with features built with Keras -- it is in use at Netflix, Uber, Yelp, Instacart, Zocdoc, Square, and many others. It is especially popular among startups that place deep learning at the core of their products. Keras is also a favorite among deep learning researchers, coming in #2 in terms of mentions in scientific papers uploaded to the preprint server [arXiv.org](https://arxiv.org/archive/cs). Keras has also been adopted by researchers at large scientific organizations, in particular CERN and NASA. --- ## Keras makes it easy to turn models into products Your Keras models can be easily deployed across a greater range of platforms than any other deep learning framework: - On iOS, via [Apple’s CoreML](https://developer.apple.com/documentation/coreml) (Keras support officially provided by Apple). Here's [a tutorial](https://www.pyimagesearch.com/2018/04/23/running-keras-models-on-ios-with-coreml/). - On Android, via the TensorFlow Android runtime. Example: [Not Hotdog app](https://medium.com/@timanglade/how-hbos-silicon-valley-built-not-hotdog-with-mobile-tensorflow-keras-react-native-ef03260747f3). - In the browser, via GPU-accelerated JavaScript runtimes such as [Keras.js](https://transcranial.github.io/keras-js/#/) and [WebDNN](https://mil-tokyo.github.io/webdnn/). - On Google Cloud, via [TensorFlow-Serving](https://www.tensorflow.org/serving/). - [In a Python webapp backend (such as a Flask app)](https://blog.keras.io/building-a-simple-keras-deep-learning-rest-api.html). - On the JVM, via [DL4J model import provided by SkyMind](https://deeplearning4j.org/model-import-keras). - On Raspberry Pi. --- ## Keras supports multiple backend engines and does not lock you into one ecosystem Your Keras models can be developed with a range of different [deep learning backends](https://keras.io/backend/). Importantly, any Keras model that only leverages built-in layers will be portable across all these backends: you can train a model with one backend, and load it with another (e.g. for deployment). Available backends include: - The TensorFlow backend (from Google) - The CNTK backend (from Microsoft) - The Theano backend Amazon is also currently working on developing a MXNet backend for Keras. As such, your Keras model can be trained on a number of different hardware platforms beyond CPUs: - [NVIDIA GPUs](https://developer.nvidia.com/deep-learning) - [Google TPUs](https://cloud.google.com/tpu/), via the TensorFlow backend and Google Cloud - OpenCL-enabled GPUs, such as those from AMD, via [the PlaidML Keras backend](https://github.com/plaidml/plaidml) --- ## Keras has strong multi-GPU support and distributed training support - Keras has [built-in support for multi-GPU data parallelism](/utils/#multi_gpu_model) - [Horovod](https://github.com/uber/horovod), from Uber, has first-class support for Keras models - Keras models [can be turned into TensorFlow Estimators](https://www.tensorflow.org/versions/master/api_docs/python/tf/keras/estimator/model_to_estimator) and trained on [clusters of GPUs on Google Cloud](https://cloud.google.com/solutions/running-distributed-tensorflow-on-compute-engine) - Keras can be run on Spark via [Dist-Keras](https://github.com/cerndb/dist-keras) (from CERN) and [Elephas](https://github.com/maxpumperla/elephas) --- ## Keras development is backed by key companies in the deep learning ecosystem Keras development is backed primarily by Google, and the Keras API comes packaged in TensorFlow as `tf.keras`. Additionally, Microsoft maintains the CNTK Keras backend. Amazon AWS is developing MXNet support. Other contributing companies include NVIDIA, Uber, and Apple (with CoreML). Keras-2.2.4/docs/templates/scikit-learn-api.md0000644000000000116100000000406113210317577020734 0ustar rooteng00000000000000# Wrappers for the Scikit-Learn API You can use `Sequential` Keras models (single-input only) as part of your Scikit-Learn workflow via the wrappers found at `keras.wrappers.scikit_learn.py`. There are two wrappers available: `keras.wrappers.scikit_learn.KerasClassifier(build_fn=None, **sk_params)`, which implements the Scikit-Learn classifier interface, `keras.wrappers.scikit_learn.KerasRegressor(build_fn=None, **sk_params)`, which implements the Scikit-Learn regressor interface. ### Arguments - __build_fn__: callable function or class instance - __sk_params__: model parameters & fitting parameters `build_fn` should construct, compile and return a Keras model, which will then be used to fit/predict. One of the following three values could be passed to `build_fn`: 1. A function 2. An instance of a class that implements the `__call__` method 3. None. This means you implement a class that inherits from either `KerasClassifier` or `KerasRegressor`. The `__call__` method of the present class will then be treated as the default `build_fn`. `sk_params` takes both model parameters and fitting parameters. Legal model parameters are the arguments of `build_fn`. Note that like all other estimators in scikit-learn, `build_fn` should provide default values for its arguments, so that you could create the estimator without passing any values to `sk_params`. `sk_params` could also accept parameters for calling `fit`, `predict`, `predict_proba`, and `score` methods (e.g., `epochs`, `batch_size`). fitting (predicting) parameters are selected in the following order: 1. Values passed to the dictionary arguments of `fit`, `predict`, `predict_proba`, and `score` methods 2. Values passed to `sk_params` 3. The default values of the `keras.models.Sequential` `fit`, `predict`, `predict_proba` and `score` methods When using scikit-learn's `grid_search` API, legal tunable parameters are those you could pass to `sk_params`, including fitting parameters. In other words, you could use `grid_search` to search for the best `batch_size` or `epochs` as well as the model parameters. Keras-2.2.4/docs/templates/optimizers.md0000644000000000116100000000237113250037310017773 0ustar rooteng00000000000000 ## Usage of optimizers An optimizer is one of the two arguments required for compiling a Keras model: ```python from keras import optimizers model = Sequential() model.add(Dense(64, kernel_initializer='uniform', input_shape=(10,))) model.add(Activation('softmax')) sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='mean_squared_error', optimizer=sgd) ``` You can either instantiate an optimizer before passing it to `model.compile()` , as in the above example, or you can call it by its name. In the latter case, the default parameters for the optimizer will be used. ```python # pass optimizer by name: default parameters will be used model.compile(loss='mean_squared_error', optimizer='sgd') ``` --- ## Parameters common to all Keras optimizers The parameters `clipnorm` and `clipvalue` can be used with all optimizers to control gradient clipping: ```python from keras import optimizers # All parameter gradients will be clipped to # a maximum norm of 1. sgd = optimizers.SGD(lr=0.01, clipnorm=1.) ``` ```python from keras import optimizers # All parameter gradients will be clipped to # a maximum value of 0.5 and # a minimum value of -0.5. sgd = optimizers.SGD(lr=0.01, clipvalue=0.5) ``` --- {{autogenerated}} Keras-2.2.4/docs/templates/layers/0000755000000000116100000000000013355226624016556 5ustar rooteng00000000000000Keras-2.2.4/docs/templates/layers/writing-your-own-keras-layers.md0000644000000000116100000000627413342055016024760 0ustar rooteng00000000000000# Writing your own Keras layers For simple, stateless custom operations, you are probably better off using `layers.core.Lambda` layers. But for any custom operation that has trainable weights, you should implement your own layer. Here is the skeleton of a Keras layer, **as of Keras 2.0** (if you have an older version, please upgrade). There are only three methods you need to implement: - `build(input_shape)`: this is where you will define your weights. This method must set `self.built = True` at the end, which can be done by calling `super([Layer], self).build()`. - `call(x)`: this is where the layer's logic lives. Unless you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor. - `compute_output_shape(input_shape)`: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference. ```python from keras import backend as K from keras.engine.topology import Layer class MyLayer(Layer): def __init__(self, output_dim, **kwargs): self.output_dim = output_dim super(MyLayer, self).__init__(**kwargs) def build(self, input_shape): # Create a trainable weight variable for this layer. self.kernel = self.add_weight(name='kernel', shape=(input_shape[1], self.output_dim), initializer='uniform', trainable=True) super(MyLayer, self).build(input_shape) # Be sure to call this at the end def call(self, x): return K.dot(x, self.kernel) def compute_output_shape(self, input_shape): return (input_shape[0], self.output_dim) ``` It is also possible to define Keras layers which have multiple input tensors and multiple ouput tensors. To do this, you should assume that the inputs and outputs of the methods `build(input_shape)`, `call(x)` and `compute_output_shape(input_shape)` are lists. Here is an example, similar to the one above: ```python from keras import backend as K from keras.engine.topology import Layer class MyLayer(Layer): def __init__(self, output_dim, **kwargs): self.output_dim = output_dim super(MyLayer, self).__init__(**kwargs) def build(self, input_shape): assert isinstance(input_shape, list) # Create a trainable weight variable for this layer. self.kernel = self.add_weight(name='kernel', shape=(input_shape[0][1], self.output_dim), initializer='uniform', trainable=True) super(MyLayer, self).build(input_shape) # Be sure to call this at the end def call(self, x): assert isinstance(x, list) a, b = x return [K.dot(a, self.kernel) + b, K.mean(b, axis=-1)] def compute_output_shape(self, input_shape): assert isinstance(input_shape, list) shape_a, shape_b = input_shape return [(shape_a[0], self.output_dim), shape_b[:-1]] ``` The existing Keras layers provide examples of how to implement almost anything. Never hesitate to read the source code! Keras-2.2.4/docs/templates/layers/about-keras-layers.md0000644000000000116100000000243613146670577022627 0ustar rooteng00000000000000# About Keras layers All Keras layers have a number of methods in common: - `layer.get_weights()`: returns the weights of the layer as a list of Numpy arrays. - `layer.set_weights(weights)`: sets the weights of the layer from a list of Numpy arrays (with the same shapes as the output of `get_weights`). - `layer.get_config()`: returns a dictionary containing the configuration of the layer. The layer can be reinstantiated from its config via: ```python layer = Dense(32) config = layer.get_config() reconstructed_layer = Dense.from_config(config) ``` Or: ```python from keras import layers config = layer.get_config() layer = layers.deserialize({'class_name': layer.__class__.__name__, 'config': config}) ``` If a layer has a single node (i.e. if it isn't a shared layer), you can get its input tensor, output tensor, input shape and output shape via: - `layer.input` - `layer.output` - `layer.input_shape` - `layer.output_shape` If the layer has multiple nodes (see: [the concept of layer node and shared layers](/getting-started/functional-api-guide/#the-concept-of-layer-node)), you can use the following methods: - `layer.get_input_at(node_index)` - `layer.get_output_at(node_index)` - `layer.get_input_shape_at(node_index)` - `layer.get_output_shape_at(node_index)`Keras-2.2.4/docs/templates/initializers.md0000644000000000116100000000232713146670577020323 0ustar rooteng00000000000000## Usage of initializers Initializations define the way to set the initial random weights of Keras layers. The keyword arguments used for passing initializers to layers will depend on the layer. Usually it is simply `kernel_initializer` and `bias_initializer`: ```python model.add(Dense(64, kernel_initializer='random_uniform', bias_initializer='zeros')) ``` ## Available initializers The following built-in initializers are available as part of the `keras.initializers` module: {{autogenerated}} An initializer may be passed as a string (must match one of the available initializers above), or as a callable: ```python from keras import initializers model.add(Dense(64, kernel_initializer=initializers.random_normal(stddev=0.01))) # also works; will use the default parameters. model.add(Dense(64, kernel_initializer='random_normal')) ``` ## Using custom initializers If passing a custom callable, then it must take the argument `shape` (shape of the variable to initialize) and `dtype` (dtype of generated values): ```python from keras import backend as K def my_init(shape, dtype=None): return K.random_normal(shape, dtype=dtype) model.add(Dense(64, kernel_initializer=my_init)) ``` Keras-2.2.4/docs/templates/constraints.md0000644000000000116100000000150113176437624020152 0ustar rooteng00000000000000## Usage of constraints Functions from the `constraints` module allow setting constraints (eg. non-negativity) on network parameters during optimization. The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `Conv1D`, `Conv2D` and `Conv3D` have a unified API. These layers expose 2 keyword arguments: - `kernel_constraint` for the main weights matrix - `bias_constraint` for the bias. ```python from keras.constraints import max_norm model.add(Dense(64, kernel_constraint=max_norm(2.))) ``` ## Available constraints - __max_norm(max_value=2, axis=0)__: maximum-norm constraint - __non_neg()__: non-negativity constraint - __unit_norm(axis=0)__: unit-norm constraint - __min_max_norm(min_value=0.0, max_value=1.0, rate=1.0, axis=0)__: minimum/maximum-norm constraint Keras-2.2.4/docs/templates/losses.md0000644000000000116100000000266613227311000017100 0ustar rooteng00000000000000 ## Usage of loss functions A loss function (or objective function, or optimization score function) is one of the two parameters required to compile a model: ```python model.compile(loss='mean_squared_error', optimizer='sgd') ``` ```python from keras import losses model.compile(loss=losses.mean_squared_error, optimizer='sgd') ``` You can either pass the name of an existing loss function, or pass a TensorFlow/Theano symbolic function that returns a scalar for each data-point and takes the following two arguments: - __y_true__: True labels. TensorFlow/Theano tensor. - __y_pred__: Predictions. TensorFlow/Theano tensor of the same shape as y_true. The actual optimized objective is the mean of the output array across all datapoints. For a few examples of such functions, check out the [losses source](https://github.com/keras-team/keras/blob/master/keras/losses.py). ## Available loss functions {{autogenerated}} ---- **Note**: when using the `categorical_crossentropy` loss, your targets should be in categorical format (e.g. if you have 10 classes, the target for each sample should be a 10-dimensional vector that is all-zeros except for a 1 at the index corresponding to the class of the sample). In order to convert *integer targets* into *categorical targets*, you can use the Keras utility `to_categorical`: ```python from keras.utils.np_utils import to_categorical categorical_labels = to_categorical(int_labels, num_classes=None) ``` Keras-2.2.4/docs/templates/models/0000755000000000116100000000000013355226624016542 5ustar rooteng00000000000000Keras-2.2.4/docs/templates/models/sequential.md0000644000000000116100000000027313306065631021233 0ustar rooteng00000000000000# The Sequential model API To get started, read [this guide to the Keras Sequential model](/getting-started/sequential-model-guide). ---- ## Sequential model methods {{autogenerated}}Keras-2.2.4/docs/templates/models/model.md0000644000000000116100000000125613306065631020163 0ustar rooteng00000000000000# Model class API In the functional API, given some input tensor(s) and output tensor(s), you can instantiate a `Model` via: ```python from keras.models import Model from keras.layers import Input, Dense a = Input(shape=(32,)) b = Dense(32)(a) model = Model(inputs=a, outputs=b) ``` This model will include all layers required in the computation of `b` given `a`. In the case of multi-input or multi-output models, you can use lists as well: ```python model = Model(inputs=[a1, a2], outputs=[b1, b2, b3]) ``` For a detailed introduction of what `Model` can do, read [this guide to the Keras functional API](/getting-started/functional-api-guide). ## Methods {{autogenerated}} Keras-2.2.4/docs/templates/models/about-keras-models.md0000644000000000116100000001111213326715636022562 0ustar rooteng00000000000000# About Keras models There are two main types of models available in Keras: [the Sequential model](/models/sequential), and [the Model class used with the functional API](/models/model). These models have a number of methods and attributes in common: - `model.layers` is a flattened list of the layers comprising the model. - `model.inputs` is the list of input tensors of the model. - `model.outputs` is the list of output tensors of the model. - `model.summary()` prints a summary representation of your model. Shortcut for [utils.print_summary](/utils/#print_summary) - `model.get_config()` returns a dictionary containing the configuration of the model. The model can be reinstantiated from its config via: ```python config = model.get_config() model = Model.from_config(config) # or, for Sequential: model = Sequential.from_config(config) ``` - `model.get_weights()` returns a list of all weight tensors in the model, as Numpy arrays. - `model.set_weights(weights)` sets the values of the weights of the model, from a list of Numpy arrays. The arrays in the list should have the same shape as those returned by `get_weights()`. - `model.to_json()` returns a representation of the model as a JSON string. Note that the representation does not include the weights, only the architecture. You can reinstantiate the same model (with reinitialized weights) from the JSON string via: ```python from keras.models import model_from_json json_string = model.to_json() model = model_from_json(json_string) ``` - `model.to_yaml()` returns a representation of the model as a YAML string. Note that the representation does not include the weights, only the architecture. You can reinstantiate the same model (with reinitialized weights) from the YAML string via: ```python from keras.models import model_from_yaml yaml_string = model.to_yaml() model = model_from_yaml(yaml_string) ``` - `model.save_weights(filepath)` saves the weights of the model as a HDF5 file. - `model.load_weights(filepath, by_name=False)` loads the weights of the model from a HDF5 file (created by `save_weights`). By default, the architecture is expected to be unchanged. To load weights into a different architecture (with some layers in common), use `by_name=True` to load only those layers with the same name. Note: Please also see [How can I install HDF5 or h5py to save my models in Keras?](/getting-started/faq/#how-can-i-install-HDF5-or-h5py-to-save-my-models-in-Keras) in the FAQ for instructions on how to install `h5py`. ## Model subclassing In addition to these two types of models, you may create your own fully-customizable models by subclassing the `Model` class and implementing your own forward pass in the `call` method (the `Model` subclassing API was introduced in Keras 2.2.0). Here's an example of a simple multi-layer perceptron model written as a `Model` subclass: ```python import keras class SimpleMLP(keras.Model): def __init__(self, use_bn=False, use_dp=False, num_classes=10): super(SimpleMLP, self).__init__(name='mlp') self.use_bn = use_bn self.use_dp = use_dp self.num_classes = num_classes self.dense1 = keras.layers.Dense(32, activation='relu') self.dense2 = keras.layers.Dense(num_classes, activation='softmax') if self.use_dp: self.dp = keras.layers.Dropout(0.5) if self.use_bn: self.bn = keras.layers.BatchNormalization(axis=-1) def call(self, inputs): x = self.dense1(inputs) if self.use_dp: x = self.dp(x) if self.use_bn: x = self.bn(x) return self.dense2(x) model = SimpleMLP() model.compile(...) model.fit(...) ``` Layers are defined in `__init__(self, ...)`, and the forward pass is specified in `call(self, inputs)`. In `call`, you may specify custom losses by calling `self.add_loss(loss_tensor)` (like you would in a custom layer). In subclassed models, the model's topology is defined as Python code (rather than as a static graph of layers). That means the model's topology cannot be inspected or serialized. As a result, the following methods and attributes are **not available for subclassed models**: - `model.inputs` and `model.outputs`. - `model.to_yaml()` and `model.to_json()` - `model.get_config()` and `model.save()`. **Key point:** use the right API for the job. The `Model` subclassing API can provide you with greater flexbility for implementing complex models, but it comes at a cost (in addition to these missing features): it is more verbose, more complex, and has more opportunities for user errors. If possible, prefer using the functional API, which is more user-friendly. Keras-2.2.4/docs/templates/backend.md0000644000000000116100000001075613227311000017156 0ustar rooteng00000000000000# Keras backends ## What is a "backend"? Keras is a model-level library, providing high-level building blocks for developing deep learning models. It does not handle itself low-level operations such as tensor products, convolutions and so on. Instead, it relies on a specialized, well-optimized tensor manipulation library to do so, serving as the "backend engine" of Keras. Rather than picking one single tensor library and making the implementation of Keras tied to that library, Keras handles the problem in a modular way, and several different backend engines can be plugged seamlessly into Keras. At this time, Keras has three backend implementations available: the **TensorFlow** backend, the **Theano** backend, and the **CNTK** backend. - [TensorFlow](http://www.tensorflow.org/) is an open-source symbolic tensor manipulation framework developed by Google. - [Theano](http://deeplearning.net/software/theano/) is an open-source symbolic tensor manipulation framework developed by LISA Lab at Université de Montréal. - [CNTK](https://www.microsoft.com/en-us/cognitive-toolkit/) is an open-source toolkit for deep learning developed by Microsoft. In the future, we are likely to add more backend options. ---- ## Switching from one backend to another If you have run Keras at least once, you will find the Keras configuration file at: `$HOME/.keras/keras.json` If it isn't there, you can create it. **NOTE for Windows Users:** Please replace `$HOME` with `%USERPROFILE%`. The default configuration file looks like this: ``` { "image_data_format": "channels_last", "epsilon": 1e-07, "floatx": "float32", "backend": "tensorflow" } ``` Simply change the field `backend` to `"theano"`, `"tensorflow"`, or `"cntk"`, and Keras will use the new configuration next time you run any Keras code. You can also define the environment variable ``KERAS_BACKEND`` and this will override what is defined in your config file : ```bash KERAS_BACKEND=tensorflow python -c "from keras import backend" Using TensorFlow backend. ``` ---- ## keras.json details The `keras.json` configuration file contains the following settings: ``` { "image_data_format": "channels_last", "epsilon": 1e-07, "floatx": "float32", "backend": "tensorflow" } ``` You can change these settings by editing `$HOME/.keras/keras.json`. * `image_data_format`: String, either `"channels_last"` or `"channels_first"`. It specifies which data format convention Keras will follow. (`keras.backend.image_data_format()` returns it.) - For 2D data (e.g. image), `"channels_last"` assumes `(rows, cols, channels)` while `"channels_first"` assumes `(channels, rows, cols)`. - For 3D data, `"channels_last"` assumes `(conv_dim1, conv_dim2, conv_dim3, channels)` while `"channels_first"` assumes `(channels, conv_dim1, conv_dim2, conv_dim3)`. * `epsilon`: Float, a numeric fuzzing constant used to avoid dividing by zero in some operations. * `floatx`: String, `"float16"`, `"float32"`, or `"float64"`. Default float precision. * `backend`: String, `"tensorflow"`, `"theano"`, or `"cntk"`. ---- ## Using the abstract Keras backend to write new code If you want the Keras modules you write to be compatible with both Theano (`th`) and TensorFlow (`tf`), you have to write them via the abstract Keras backend API. Here's an intro. You can import the backend module via: ```python from keras import backend as K ``` The code below instantiates an input placeholder. It's equivalent to `tf.placeholder()` or `th.tensor.matrix()`, `th.tensor.tensor3()`, etc. ```python inputs = K.placeholder(shape=(2, 4, 5)) # also works: inputs = K.placeholder(shape=(None, 4, 5)) # also works: inputs = K.placeholder(ndim=3) ``` The code below instantiates a variable. It's equivalent to `tf.Variable()` or `th.shared()`. ```python import numpy as np val = np.random.random((3, 4, 5)) var = K.variable(value=val) # all-zeros variable: var = K.zeros(shape=(3, 4, 5)) # all-ones: var = K.ones(shape=(3, 4, 5)) ``` Most tensor operations you will need can be done as you would in TensorFlow or Theano: ```python # Initializing Tensors with Random Numbers b = K.random_uniform_variable(shape=(3, 4), low=0, high=1) # Uniform distribution c = K.random_normal_variable(shape=(3, 4), mean=0, scale=1) # Gaussian distribution d = K.random_normal_variable(shape=(3, 4), mean=0, scale=1) # Tensor Arithmetic a = b + c * K.abs(d) c = K.dot(a, K.transpose(b)) a = K.sum(b, axis=1) a = K.softmax(b) a = K.concatenate([b, c], axis=-1) # etc... ``` ---- ## Backend functions {{autogenerated}} Keras-2.2.4/docs/templates/getting-started/0000755000000000116100000000000013355226624020364 5ustar rooteng00000000000000Keras-2.2.4/docs/templates/getting-started/functional-api-guide.md0000644000000000116100000004062213254042211024700 0ustar rooteng00000000000000# Getting started with the Keras functional API The Keras functional API is the way to go for defining complex models, such as multi-output models, directed acyclic graphs, or models with shared layers. This guide assumes that you are already familiar with the `Sequential` model. Let's start with something simple. ----- ## First example: a densely-connected network The `Sequential` model is probably a better choice to implement such a network, but it helps to start with something really simple. - A layer instance is callable (on a tensor), and it returns a tensor - Input tensor(s) and output tensor(s) can then be used to define a `Model` - Such a model can be trained just like Keras `Sequential` models. ```python from keras.layers import Input, Dense from keras.models import Model # This returns a tensor inputs = Input(shape=(784,)) # a layer instance is callable on a tensor, and returns a tensor x = Dense(64, activation='relu')(inputs) x = Dense(64, activation='relu')(x) predictions = Dense(10, activation='softmax')(x) # This creates a model that includes # the Input layer and three Dense layers model = Model(inputs=inputs, outputs=predictions) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) model.fit(data, labels) # starts training ``` ----- ## All models are callable, just like layers With the functional API, it is easy to reuse trained models: you can treat any model as if it were a layer, by calling it on a tensor. Note that by calling a model you aren't just reusing the *architecture* of the model, you are also reusing its weights. ```python x = Input(shape=(784,)) # This works, and returns the 10-way softmax we defined above. y = model(x) ``` This can allow, for instance, to quickly create models that can process *sequences* of inputs. You could turn an image classification model into a video classification model, in just one line. ```python from keras.layers import TimeDistributed # Input tensor for sequences of 20 timesteps, # each containing a 784-dimensional vector input_sequences = Input(shape=(20, 784)) # This applies our previous model to every timestep in the input sequences. # the output of the previous model was a 10-way softmax, # so the output of the layer below will be a sequence of 20 vectors of size 10. processed_sequences = TimeDistributed(model)(input_sequences) ``` ----- ## Multi-input and multi-output models Here's a good use case for the functional API: models with multiple inputs and outputs. The functional API makes it easy to manipulate a large number of intertwined datastreams. Let's consider the following model. We seek to predict how many retweets and likes a news headline will receive on Twitter. The main input to the model will be the headline itself, as a sequence of words, but to spice things up, our model will also have an auxiliary input, receiving extra data such as the time of day when the headline was posted, etc. The model will also be supervised via two loss functions. Using the main loss function earlier in a model is a good regularization mechanism for deep models. Here's what our model looks like: multi-input-multi-output-graph Let's implement it with the functional API. The main input will receive the headline, as a sequence of integers (each integer encodes a word). The integers will be between 1 and 10,000 (a vocabulary of 10,000 words) and the sequences will be 100 words long. ```python from keras.layers import Input, Embedding, LSTM, Dense from keras.models import Model # Headline input: meant to receive sequences of 100 integers, between 1 and 10000. # Note that we can name any layer by passing it a "name" argument. main_input = Input(shape=(100,), dtype='int32', name='main_input') # This embedding layer will encode the input sequence # into a sequence of dense 512-dimensional vectors. x = Embedding(output_dim=512, input_dim=10000, input_length=100)(main_input) # A LSTM will transform the vector sequence into a single vector, # containing information about the entire sequence lstm_out = LSTM(32)(x) ``` Here we insert the auxiliary loss, allowing the LSTM and Embedding layer to be trained smoothly even though the main loss will be much higher in the model. ```python auxiliary_output = Dense(1, activation='sigmoid', name='aux_output')(lstm_out) ``` At this point, we feed into the model our auxiliary input data by concatenating it with the LSTM output: ```python auxiliary_input = Input(shape=(5,), name='aux_input') x = keras.layers.concatenate([lstm_out, auxiliary_input]) # We stack a deep densely-connected network on top x = Dense(64, activation='relu')(x) x = Dense(64, activation='relu')(x) x = Dense(64, activation='relu')(x) # And finally we add the main logistic regression layer main_output = Dense(1, activation='sigmoid', name='main_output')(x) ``` This defines a model with two inputs and two outputs: ```python model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output, auxiliary_output]) ``` We compile the model and assign a weight of 0.2 to the auxiliary loss. To specify different `loss_weights` or `loss` for each different output, you can use a list or a dictionary. Here we pass a single loss as the `loss` argument, so the same loss will be used on all outputs. ```python model.compile(optimizer='rmsprop', loss='binary_crossentropy', loss_weights=[1., 0.2]) ``` We can train the model by passing it lists of input arrays and target arrays: ```python model.fit([headline_data, additional_data], [labels, labels], epochs=50, batch_size=32) ``` Since our inputs and outputs are named (we passed them a "name" argument), we could also have compiled the model via: ```python model.compile(optimizer='rmsprop', loss={'main_output': 'binary_crossentropy', 'aux_output': 'binary_crossentropy'}, loss_weights={'main_output': 1., 'aux_output': 0.2}) # And trained it via: model.fit({'main_input': headline_data, 'aux_input': additional_data}, {'main_output': labels, 'aux_output': labels}, epochs=50, batch_size=32) ``` ----- ## Shared layers Another good use for the functional API are models that use shared layers. Let's take a look at shared layers. Let's consider a dataset of tweets. We want to build a model that can tell whether two tweets are from the same person or not (this can allow us to compare users by the similarity of their tweets, for instance). One way to achieve this is to build a model that encodes two tweets into two vectors, concatenates the vectors and then adds a logistic regression; this outputs a probability that the two tweets share the same author. The model would then be trained on positive tweet pairs and negative tweet pairs. Because the problem is symmetric, the mechanism that encodes the first tweet should be reused (weights and all) to encode the second tweet. Here we use a shared LSTM layer to encode the tweets. Let's build this with the functional API. We will take as input for a tweet a binary matrix of shape `(280, 256)`, i.e. a sequence of 280 vectors of size 256, where each dimension in the 256-dimensional vector encodes the presence/absence of a character (out of an alphabet of 256 frequent characters). ```python import keras from keras.layers import Input, LSTM, Dense from keras.models import Model tweet_a = Input(shape=(280, 256)) tweet_b = Input(shape=(280, 256)) ``` To share a layer across different inputs, simply instantiate the layer once, then call it on as many inputs as you want: ```python # This layer can take as input a matrix # and will return a vector of size 64 shared_lstm = LSTM(64) # When we reuse the same layer instance # multiple times, the weights of the layer # are also being reused # (it is effectively *the same* layer) encoded_a = shared_lstm(tweet_a) encoded_b = shared_lstm(tweet_b) # We can then concatenate the two vectors: merged_vector = keras.layers.concatenate([encoded_a, encoded_b], axis=-1) # And add a logistic regression on top predictions = Dense(1, activation='sigmoid')(merged_vector) # We define a trainable model linking the # tweet inputs to the predictions model = Model(inputs=[tweet_a, tweet_b], outputs=predictions) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) model.fit([data_a, data_b], labels, epochs=10) ``` Let's pause to take a look at how to read the shared layer's output or output shape. ----- ## The concept of layer "node" Whenever you are calling a layer on some input, you are creating a new tensor (the output of the layer), and you are adding a "node" to the layer, linking the input tensor to the output tensor. When you are calling the same layer multiple times, that layer owns multiple nodes indexed as 0, 1, 2... In previous versions of Keras, you could obtain the output tensor of a layer instance via `layer.get_output()`, or its output shape via `layer.output_shape`. You still can (except `get_output()` has been replaced by the property `output`). But what if a layer is connected to multiple inputs? As long as a layer is only connected to one input, there is no confusion, and `.output` will return the one output of the layer: ```python a = Input(shape=(280, 256)) lstm = LSTM(32) encoded_a = lstm(a) assert lstm.output == encoded_a ``` Not so if the layer has multiple inputs: ```python a = Input(shape=(280, 256)) b = Input(shape=(280, 256)) lstm = LSTM(32) encoded_a = lstm(a) encoded_b = lstm(b) lstm.output ``` ``` >> AttributeError: Layer lstm_1 has multiple inbound nodes, hence the notion of "layer output" is ill-defined. Use `get_output_at(node_index)` instead. ``` Okay then. The following works: ```python assert lstm.get_output_at(0) == encoded_a assert lstm.get_output_at(1) == encoded_b ``` Simple enough, right? The same is true for the properties `input_shape` and `output_shape`: as long as the layer has only one node, or as long as all nodes have the same input/output shape, then the notion of "layer output/input shape" is well defined, and that one shape will be returned by `layer.output_shape`/`layer.input_shape`. But if, for instance, you apply the same `Conv2D` layer to an input of shape `(32, 32, 3)`, and then to an input of shape `(64, 64, 3)`, the layer will have multiple input/output shapes, and you will have to fetch them by specifying the index of the node they belong to: ```python a = Input(shape=(32, 32, 3)) b = Input(shape=(64, 64, 3)) conv = Conv2D(16, (3, 3), padding='same') conved_a = conv(a) # Only one input so far, the following will work: assert conv.input_shape == (None, 32, 32, 3) conved_b = conv(b) # now the `.input_shape` property wouldn't work, but this does: assert conv.get_input_shape_at(0) == (None, 32, 32, 3) assert conv.get_input_shape_at(1) == (None, 64, 64, 3) ``` ----- ## More examples Code examples are still the best way to get started, so here are a few more. ### Inception module For more information about the Inception architecture, see [Going Deeper with Convolutions](http://arxiv.org/abs/1409.4842). ```python from keras.layers import Conv2D, MaxPooling2D, Input input_img = Input(shape=(256, 256, 3)) tower_1 = Conv2D(64, (1, 1), padding='same', activation='relu')(input_img) tower_1 = Conv2D(64, (3, 3), padding='same', activation='relu')(tower_1) tower_2 = Conv2D(64, (1, 1), padding='same', activation='relu')(input_img) tower_2 = Conv2D(64, (5, 5), padding='same', activation='relu')(tower_2) tower_3 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(input_img) tower_3 = Conv2D(64, (1, 1), padding='same', activation='relu')(tower_3) output = keras.layers.concatenate([tower_1, tower_2, tower_3], axis=1) ``` ### Residual connection on a convolution layer For more information about residual networks, see [Deep Residual Learning for Image Recognition](http://arxiv.org/abs/1512.03385). ```python from keras.layers import Conv2D, Input # input tensor for a 3-channel 256x256 image x = Input(shape=(256, 256, 3)) # 3x3 conv with 3 output channels (same as input channels) y = Conv2D(3, (3, 3), padding='same')(x) # this returns x + y. z = keras.layers.add([x, y]) ``` ### Shared vision model This model reuses the same image-processing module on two inputs, to classify whether two MNIST digits are the same digit or different digits. ```python from keras.layers import Conv2D, MaxPooling2D, Input, Dense, Flatten from keras.models import Model # First, define the vision modules digit_input = Input(shape=(27, 27, 1)) x = Conv2D(64, (3, 3))(digit_input) x = Conv2D(64, (3, 3))(x) x = MaxPooling2D((2, 2))(x) out = Flatten()(x) vision_model = Model(digit_input, out) # Then define the tell-digits-apart model digit_a = Input(shape=(27, 27, 1)) digit_b = Input(shape=(27, 27, 1)) # The vision model will be shared, weights and all out_a = vision_model(digit_a) out_b = vision_model(digit_b) concatenated = keras.layers.concatenate([out_a, out_b]) out = Dense(1, activation='sigmoid')(concatenated) classification_model = Model([digit_a, digit_b], out) ``` ### Visual question answering model This model can select the correct one-word answer when asked a natural-language question about a picture. It works by encoding the question into a vector, encoding the image into a vector, concatenating the two, and training on top a logistic regression over some vocabulary of potential answers. ```python from keras.layers import Conv2D, MaxPooling2D, Flatten from keras.layers import Input, LSTM, Embedding, Dense from keras.models import Model, Sequential # First, let's define a vision model using a Sequential model. # This model will encode an image into a vector. vision_model = Sequential() vision_model.add(Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(224, 224, 3))) vision_model.add(Conv2D(64, (3, 3), activation='relu')) vision_model.add(MaxPooling2D((2, 2))) vision_model.add(Conv2D(128, (3, 3), activation='relu', padding='same')) vision_model.add(Conv2D(128, (3, 3), activation='relu')) vision_model.add(MaxPooling2D((2, 2))) vision_model.add(Conv2D(256, (3, 3), activation='relu', padding='same')) vision_model.add(Conv2D(256, (3, 3), activation='relu')) vision_model.add(Conv2D(256, (3, 3), activation='relu')) vision_model.add(MaxPooling2D((2, 2))) vision_model.add(Flatten()) # Now let's get a tensor with the output of our vision model: image_input = Input(shape=(224, 224, 3)) encoded_image = vision_model(image_input) # Next, let's define a language model to encode the question into a vector. # Each question will be at most 100 word long, # and we will index words as integers from 1 to 9999. question_input = Input(shape=(100,), dtype='int32') embedded_question = Embedding(input_dim=10000, output_dim=256, input_length=100)(question_input) encoded_question = LSTM(256)(embedded_question) # Let's concatenate the question vector and the image vector: merged = keras.layers.concatenate([encoded_question, encoded_image]) # And let's train a logistic regression over 1000 words on top: output = Dense(1000, activation='softmax')(merged) # This is our final model: vqa_model = Model(inputs=[image_input, question_input], outputs=output) # The next stage would be training this model on actual data. ``` ### Video question answering model Now that we have trained our image QA model, we can quickly turn it into a video QA model. With appropriate training, you will be able to show it a short video (e.g. 100-frame human action) and ask a natural language question about the video (e.g. "what sport is the boy playing?" -> "football"). ```python from keras.layers import TimeDistributed video_input = Input(shape=(100, 224, 224, 3)) # This is our video encoded via the previously trained vision_model (weights are reused) encoded_frame_sequence = TimeDistributed(vision_model)(video_input) # the output will be a sequence of vectors encoded_video = LSTM(256)(encoded_frame_sequence) # the output will be a vector # This is a model-level representation of the question encoder, reusing the same weights as before: question_encoder = Model(inputs=question_input, outputs=encoded_question) # Let's use it to encode the question: video_question_input = Input(shape=(100,), dtype='int32') encoded_video_question = question_encoder(video_question_input) # And this is our video question answering model: merged = keras.layers.concatenate([encoded_video, encoded_video_question]) output = Dense(1000, activation='softmax')(merged) video_qa_model = Model(inputs=[video_input, video_question_input], outputs=output) ``` Keras-2.2.4/docs/templates/getting-started/faq.md0000644000000000116100000006154013342055016021452 0ustar rooteng00000000000000# Keras FAQ: Frequently Asked Keras Questions - [How should I cite Keras?](#how-should-i-cite-keras) - [How can I run Keras on GPU?](#how-can-i-run-keras-on-gpu) - [How can I run a Keras model on multiple GPUs?](#how-can-i-run-a-keras-model-on-multiple-gpus) - [What does "sample", "batch", "epoch" mean?](#what-does-sample-batch-epoch-mean) - [How can I save a Keras model?](#how-can-i-save-a-keras-model) - [Why is the training loss much higher than the testing loss?](#why-is-the-training-loss-much-higher-than-the-testing-loss) - [How can I obtain the output of an intermediate layer?](#how-can-i-obtain-the-output-of-an-intermediate-layer) - [How can I use Keras with datasets that don't fit in memory?](#how-can-i-use-keras-with-datasets-that-dont-fit-in-memory) - [How can I interrupt training when the validation loss isn't decreasing anymore?](#how-can-i-interrupt-training-when-the-validation-loss-isnt-decreasing-anymore) - [How is the validation split computed?](#how-is-the-validation-split-computed) - [Is the data shuffled during training?](#is-the-data-shuffled-during-training) - [How can I record the training / validation loss / accuracy at each epoch?](#how-can-i-record-the-training-validation-loss-accuracy-at-each-epoch) - [How can I "freeze" layers?](#how-can-i-freeze-keras-layers) - [How can I use stateful RNNs?](#how-can-i-use-stateful-rnns) - [How can I remove a layer from a Sequential model?](#how-can-i-remove-a-layer-from-a-sequential-model) - [How can I use pre-trained models in Keras?](#how-can-i-use-pre-trained-models-in-keras) - [How can I use HDF5 inputs with Keras?](#how-can-i-use-hdf5-inputs-with-keras) - [Where is the Keras configuration file stored?](#where-is-the-keras-configuration-file-stored) - [How can I obtain reproducible results using Keras during development?](#how-can-i-obtain-reproducible-results-using-keras-during-development) - [How can I install HDF5 or h5py to save my models in Keras?](#how-can-i-install-hdf5-or-h5py-to-save-my-models-in-keras) --- ### How should I cite Keras? Please cite Keras in your publications if it helps your research. Here is an example BibTeX entry: ``` @misc{chollet2015keras, title={Keras}, author={Chollet, Fran\c{c}ois and others}, year={2015}, howpublished={\url{https://keras.io}}, } ``` --- ### How can I run Keras on GPU? If you are running on the **TensorFlow** or **CNTK** backends, your code will automatically run on GPU if any available GPU is detected. If you are running on the **Theano** backend, you can use one of the following methods: **Method 1**: use Theano flags. ```bash THEANO_FLAGS=device=gpu,floatX=float32 python my_keras_script.py ``` The name 'gpu' might have to be changed depending on your device's identifier (e.g. `gpu0`, `gpu1`, etc). **Method 2**: set up your `.theanorc`: [Instructions](http://deeplearning.net/software/theano/library/config.html) **Method 3**: manually set `theano.config.device`, `theano.config.floatX` at the beginning of your code: ```python import theano theano.config.device = 'gpu' theano.config.floatX = 'float32' ``` --- ### How can I run a Keras model on multiple GPUs? We recommend doing so using the **TensorFlow** backend. There are two ways to run a single model on multiple GPUs: **data parallelism** and **device parallelism**. In most cases, what you need is most likely data parallelism. #### Data parallelism Data parallelism consists in replicating the target model once on each device, and using each replica to process a different fraction of the input data. Keras has a built-in utility, `keras.utils.multi_gpu_model`, which can produce a data-parallel version of any model, and achieves quasi-linear speedup on up to 8 GPUs. For more information, see the documentation for [multi_gpu_model](/utils/#multi_gpu_model). Here is a quick example: ```python from keras.utils import multi_gpu_model # Replicates `model` on 8 GPUs. # This assumes that your machine has 8 available GPUs. parallel_model = multi_gpu_model(model, gpus=8) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') # This `fit` call will be distributed on 8 GPUs. # Since the batch size is 256, each GPU will process 32 samples. parallel_model.fit(x, y, epochs=20, batch_size=256) ``` #### Device parallelism Device parallelism consists in running different parts of a same model on different devices. It works best for models that have a parallel architecture, e.g. a model with two branches. This can be achieved by using TensorFlow device scopes. Here is a quick example: ```python # Model where a shared LSTM is used to encode two different sequences in parallel input_a = keras.Input(shape=(140, 256)) input_b = keras.Input(shape=(140, 256)) shared_lstm = keras.layers.LSTM(64) # Process the first sequence on one GPU with tf.device_scope('/gpu:0'): encoded_a = shared_lstm(tweet_a) # Process the next sequence on another GPU with tf.device_scope('/gpu:1'): encoded_b = shared_lstm(tweet_b) # Concatenate results on CPU with tf.device_scope('/cpu:0'): merged_vector = keras.layers.concatenate([encoded_a, encoded_b], axis=-1) ``` --- ### What does "sample", "batch", "epoch" mean? Below are some common definitions that are necessary to know and understand to correctly utilize Keras: - **Sample**: one element of a dataset. - *Example:* one image is a **sample** in a convolutional network - *Example:* one audio file is a **sample** for a speech recognition model - **Batch**: a set of *N* samples. The samples in a **batch** are processed independently, in parallel. If training, a batch results in only one update to the model. - A **batch** generally approximates the distribution of the input data better than a single input. The larger the batch, the better the approximation; however, it is also true that the batch will take longer to process and will still result in only one update. For inference (evaluate/predict), it is recommended to pick a batch size that is as large as you can afford without going out of memory (since larger batches will usually result in faster evaluating/prediction). - **Epoch**: an arbitrary cutoff, generally defined as "one pass over the entire dataset", used to separate training into distinct phases, which is useful for logging and periodic evaluation. - When using `evaluation_data` or `evaluation_split` with the `fit` method of Keras models, evaluation will be run at the end of every **epoch**. - Within Keras, there is the ability to add [callbacks](https://keras.io/callbacks/) specifically designed to be run at the end of an **epoch**. Examples of these are learning rate changes and model checkpointing (saving). --- ### How can I save a Keras model? #### Saving/loading whole models (architecture + weights + optimizer state) *It is not recommended to use pickle or cPickle to save a Keras model.* You can use `model.save(filepath)` to save a Keras model into a single HDF5 file which will contain: - the architecture of the model, allowing to re-create the model - the weights of the model - the training configuration (loss, optimizer) - the state of the optimizer, allowing to resume training exactly where you left off. You can then use `keras.models.load_model(filepath)` to reinstantiate your model. `load_model` will also take care of compiling the model using the saved training configuration (unless the model was never compiled in the first place). Please also see [How can I install HDF5 or h5py to save my models in Keras?](#how-can-i-install-hdf5-or-h5py-to-save-my-models-in-keras) for instructions on how to install `h5py`. Example: ```python from keras.models import load_model model.save('my_model.h5') # creates a HDF5 file 'my_model.h5' del model # deletes the existing model # returns a compiled model # identical to the previous one model = load_model('my_model.h5') ``` #### Saving/loading only a model's architecture If you only need to save the **architecture of a model**, and not its weights or its training configuration, you can do: ```python # save as JSON json_string = model.to_json() # save as YAML yaml_string = model.to_yaml() ``` The generated JSON / YAML files are human-readable and can be manually edited if needed. You can then build a fresh model from this data: ```python # model reconstruction from JSON: from keras.models import model_from_json model = model_from_json(json_string) # model reconstruction from YAML from keras.models import model_from_yaml model = model_from_yaml(yaml_string) ``` #### Saving/loading only a model's weights If you need to save the **weights of a model**, you can do so in HDF5 with the code below. ```python model.save_weights('my_model_weights.h5') ``` Assuming you have code for instantiating your model, you can then load the weights you saved into a model with the *same* architecture: ```python model.load_weights('my_model_weights.h5') ``` If you need to load weights into a *different* architecture (with some layers in common), for instance for fine-tuning or transfer-learning, you can load weights by *layer name*: ```python model.load_weights('my_model_weights.h5', by_name=True) ``` Please also see [How can I install HDF5 or h5py to save my models in Keras?](#how-can-i-install-hdf5-or-h5py-to-save-my-models-in-keras) for instructions on how to install `h5py`. For example: ```python """ Assuming the original model looks like this: model = Sequential() model.add(Dense(2, input_dim=3, name='dense_1')) model.add(Dense(3, name='dense_2')) ... model.save_weights(fname) """ # new model model = Sequential() model.add(Dense(2, input_dim=3, name='dense_1')) # will be loaded model.add(Dense(10, name='new_dense')) # will not be loaded # load weights from first model; will only affect the first layer, dense_1. model.load_weights(fname, by_name=True) ``` #### Handling custom layers (or other custom objects) in saved models If the model you want to load includes custom layers or other custom classes or functions, you can pass them to the loading mechanism via the `custom_objects` argument: ```python from keras.models import load_model # Assuming your model includes instance of an "AttentionLayer" class model = load_model('my_model.h5', custom_objects={'AttentionLayer': AttentionLayer}) ``` Alternatively, you can use a [custom object scope](https://keras.io/utils/#customobjectscope): ```python from keras.utils import CustomObjectScope with CustomObjectScope({'AttentionLayer': AttentionLayer}): model = load_model('my_model.h5') ``` Custom objects handling works the same way for `load_model`, `model_from_json`, `model_from_yaml`: ```python from keras.models import model_from_json model = model_from_json(json_string, custom_objects={'AttentionLayer': AttentionLayer}) ``` --- ### Why is the training loss much higher than the testing loss? A Keras model has two modes: training and testing. Regularization mechanisms, such as Dropout and L1/L2 weight regularization, are turned off at testing time. Besides, the training loss is the average of the losses over each batch of training data. Because your model is changing over time, the loss over the first batches of an epoch is generally higher than over the last batches. On the other hand, the testing loss for an epoch is computed using the model as it is at the end of the epoch, resulting in a lower loss. --- ### How can I obtain the output of an intermediate layer? One simple way is to create a new `Model` that will output the layers that you are interested in: ```python from keras.models import Model model = ... # create the original model layer_name = 'my_layer' intermediate_layer_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output) intermediate_output = intermediate_layer_model.predict(data) ``` Alternatively, you can build a Keras function that will return the output of a certain layer given a certain input, for example: ```python from keras import backend as K # with a Sequential model get_3rd_layer_output = K.function([model.layers[0].input], [model.layers[3].output]) layer_output = get_3rd_layer_output([x])[0] ``` Similarly, you could build a Theano and TensorFlow function directly. Note that if your model has a different behavior in training and testing phase (e.g. if it uses `Dropout`, `BatchNormalization`, etc.), you will need to pass the learning phase flag to your function: ```python get_3rd_layer_output = K.function([model.layers[0].input, K.learning_phase()], [model.layers[3].output]) # output in test mode = 0 layer_output = get_3rd_layer_output([x, 0])[0] # output in train mode = 1 layer_output = get_3rd_layer_output([x, 1])[0] ``` --- ### How can I use Keras with datasets that don't fit in memory? You can do batch training using `model.train_on_batch(x, y)` and `model.test_on_batch(x, y)`. See the [models documentation](/models/sequential). Alternatively, you can write a generator that yields batches of training data and use the method `model.fit_generator(data_generator, steps_per_epoch, epochs)`. You can see batch training in action in our [CIFAR10 example](https://github.com/keras-team/keras/blob/master/examples/cifar10_cnn.py). --- ### How can I interrupt training when the validation loss isn't decreasing anymore? You can use an `EarlyStopping` callback: ```python from keras.callbacks import EarlyStopping early_stopping = EarlyStopping(monitor='val_loss', patience=2) model.fit(x, y, validation_split=0.2, callbacks=[early_stopping]) ``` Find out more in the [callbacks documentation](/callbacks). --- ### How is the validation split computed? If you set the `validation_split` argument in `model.fit` to e.g. 0.1, then the validation data used will be the *last 10%* of the data. If you set it to 0.25, it will be the last 25% of the data, etc. Note that the data isn't shuffled before extracting the validation split, so the validation is literally just the *last* x% of samples in the input you passed. The same validation set is used for all epochs (within a same call to `fit`). --- ### Is the data shuffled during training? Yes, if the `shuffle` argument in `model.fit` is set to `True` (which is the default), the training data will be randomly shuffled at each epoch. Validation data is never shuffled. --- ### How can I record the training / validation loss / accuracy at each epoch? The `model.fit` method returns an `History` callback, which has a `history` attribute containing the lists of successive losses and other metrics. ```python hist = model.fit(x, y, validation_split=0.2) print(hist.history) ``` --- ### How can I "freeze" Keras layers? To "freeze" a layer means to exclude it from training, i.e. its weights will never be updated. This is useful in the context of fine-tuning a model, or using fixed embeddings for a text input. You can pass a `trainable` argument (boolean) to a layer constructor to set a layer to be non-trainable: ```python frozen_layer = Dense(32, trainable=False) ``` Additionally, you can set the `trainable` property of a layer to `True` or `False` after instantiation. For this to take effect, you will need to call `compile()` on your model after modifying the `trainable` property. Here's an example: ```python x = Input(shape=(32,)) layer = Dense(32) layer.trainable = False y = layer(x) frozen_model = Model(x, y) # in the model below, the weights of `layer` will not be updated during training frozen_model.compile(optimizer='rmsprop', loss='mse') layer.trainable = True trainable_model = Model(x, y) # with this model the weights of the layer will be updated during training # (which will also affect the above model since it uses the same layer instance) trainable_model.compile(optimizer='rmsprop', loss='mse') frozen_model.fit(data, labels) # this does NOT update the weights of `layer` trainable_model.fit(data, labels) # this updates the weights of `layer` ``` --- ### How can I use stateful RNNs? Making a RNN stateful means that the states for the samples of each batch will be reused as initial states for the samples in the next batch. When using stateful RNNs, it is therefore assumed that: - all batches have the same number of samples - If `x1` and `x2` are successive batches of samples, then `x2[i]` is the follow-up sequence to `x1[i]`, for every `i`. To use statefulness in RNNs, you need to: - explicitly specify the batch size you are using, by passing a `batch_size` argument to the first layer in your model. E.g. `batch_size=32` for a 32-samples batch of sequences of 10 timesteps with 16 features per timestep. - set `stateful=True` in your RNN layer(s). - specify `shuffle=False` when calling fit(). To reset the states accumulated: - use `model.reset_states()` to reset the states of all layers in the model - use `layer.reset_states()` to reset the states of a specific stateful RNN layer Example: ```python x # this is our input data, of shape (32, 21, 16) # we will feed it to our model in sequences of length 10 model = Sequential() model.add(LSTM(32, input_shape=(10, 16), batch_size=32, stateful=True)) model.add(Dense(16, activation='softmax')) model.compile(optimizer='rmsprop', loss='categorical_crossentropy') # we train the network to predict the 11th timestep given the first 10: model.train_on_batch(x[:, :10, :], np.reshape(x[:, 10, :], (32, 16))) # the state of the network has changed. We can feed the follow-up sequences: model.train_on_batch(x[:, 10:20, :], np.reshape(x[:, 20, :], (32, 16))) # let's reset the states of the LSTM layer: model.reset_states() # another way to do it in this case: model.layers[0].reset_states() ``` Note that the methods `predict`, `fit`, `train_on_batch`, `predict_classes`, etc. will *all* update the states of the stateful layers in a model. This allows you to do not only stateful training, but also stateful prediction. --- ### How can I remove a layer from a Sequential model? You can remove the last added layer in a Sequential model by calling `.pop()`: ```python model = Sequential() model.add(Dense(32, activation='relu', input_dim=784)) model.add(Dense(32, activation='relu')) print(len(model.layers)) # "2" model.pop() print(len(model.layers)) # "1" ``` --- ### How can I use pre-trained models in Keras? Code and pre-trained weights are available for the following image classification models: - Xception - VGG16 - VGG19 - ResNet50 - Inception v3 - Inception-ResNet v2 - MobileNet v1 They can be imported from the module `keras.applications`: ```python from keras.applications.xception import Xception from keras.applications.vgg16 import VGG16 from keras.applications.vgg19 import VGG19 from keras.applications.resnet50 import ResNet50 from keras.applications.inception_v3 import InceptionV3 from keras.applications.inception_resnet_v2 import InceptionResNetV2 from keras.applications.mobilenet import MobileNet model = VGG16(weights='imagenet', include_top=True) ``` For a few simple usage examples, see [the documentation for the Applications module](/applications). For a detailed example of how to use such a pre-trained model for feature extraction or for fine-tuning, see [this blog post](http://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html). The VGG16 model is also the basis for several Keras example scripts: - [Style transfer](https://github.com/keras-team/keras/blob/master/examples/neural_style_transfer.py) - [Feature visualization](https://github.com/keras-team/keras/blob/master/examples/conv_filter_visualization.py) - [Deep dream](https://github.com/keras-team/keras/blob/master/examples/deep_dream.py) --- ### How can I use HDF5 inputs with Keras? You can use the `HDF5Matrix` class from `keras.utils.io_utils`. See [the HDF5Matrix documentation](/utils/#hdf5matrix) for details. You can also directly use a HDF5 dataset: ```python import h5py with h5py.File('input/file.hdf5', 'r') as f: x_data = f['x_data'] model.predict(x_data) ``` Please also see [How can I install HDF5 or h5py to save my models in Keras?](#how-can-i-install-hdf5-or-h5py-to-save-my-models-in-keras) for instructions on how to install `h5py`. --- ### Where is the Keras configuration file stored? The default directory where all Keras data is stored is: ```bash $HOME/.keras/ ``` Note that Windows users should replace `$HOME` with `%USERPROFILE%`. In case Keras cannot create the above directory (e.g. due to permission issues), `/tmp/.keras/` is used as a backup. The Keras configuration file is a JSON file stored at `$HOME/.keras/keras.json`. The default configuration file looks like this: ``` { "image_data_format": "channels_last", "epsilon": 1e-07, "floatx": "float32", "backend": "tensorflow" } ``` It contains the following fields: - The image data format to be used as default by image processing layers and utilities (either `channels_last` or `channels_first`). - The `epsilon` numerical fuzz factor to be used to prevent division by zero in some operations. - The default float data type. - The default backend. See the [backend documentation](/backend). Likewise, cached dataset files, such as those downloaded with [`get_file()`](/utils/#get_file), are stored by default in `$HOME/.keras/datasets/`. --- ### How can I obtain reproducible results using Keras during development? During development of a model, sometimes it is useful to be able to obtain reproducible results from run to run in order to determine if a change in performance is due to an actual model or data modification, or merely a result of a new random sample. First, you need to set the `PYTHONHASHSEED` environment variable to `0` before the program starts (not within the program itself). This is necessary in Python 3.2.3 onwards to have reproducible behavior for certain hash-based operations (e.g., the item order in a set or a dict, see [Python's documentation](https://docs.python.org/3.7/using/cmdline.html#envvar-PYTHONHASHSEED) or [issue #2280](https://github.com/keras-team/keras/issues/2280#issuecomment-306959926) for further details). One way to set the environment variable is when starting python like this: ``` $ cat test_hash.py print(hash("keras")) $ python3 test_hash.py # non-reproducible hash (Python 3.2.3+) -8127205062320133199 $ python3 test_hash.py # non-reproducible hash (Python 3.2.3+) 3204480642156461591 $ PYTHONHASHSEED=0 python3 test_hash.py # reproducible hash 4883664951434749476 $ PYTHONHASHSEED=0 python3 test_hash.py # reproducible hash 4883664951434749476 ``` Moreover, when using the TensorFlow backend and running on a GPU, some operations have non-deterministic outputs, in particular `tf.reduce_sum()`. This is due to the fact that GPUs run many operations in parallel, so the order of execution is not always guaranteed. Due to the limited precision of floats, even adding several numbers together may give slightly different results depending on the order in which you add them. You can try to avoid the non-deterministic operations, but some may be created automatically by TensorFlow to compute the gradients, so it is much simpler to just run the code on the CPU. For this, you can set the `CUDA_VISIBLE_DEVICES` environment variable to an empty string, for example: ``` $ CUDA_VISIBLE_DEVICES="" PYTHONHASHSEED=0 python your_program.py ``` The below snippet of code provides an example of how to obtain reproducible results - this is geared towards a TensorFlow backend for a Python 3 environment: ```python import numpy as np import tensorflow as tf import random as rn # The below is necessary for starting Numpy generated random numbers # in a well-defined initial state. np.random.seed(42) # The below is necessary for starting core Python generated random numbers # in a well-defined state. rn.seed(12345) # Force TensorFlow to use single thread. # Multiple threads are a potential source of non-reproducible results. # For further details, see: https://stackoverflow.com/questions/42022950/ session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) from keras import backend as K # The below tf.set_random_seed() will make random number generation # in the TensorFlow backend have a well-defined initial state. # For further details, see: # https://www.tensorflow.org/api_docs/python/tf/set_random_seed tf.set_random_seed(1234) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess) # Rest of code follows ... ``` --- ### How can I install HDF5 or h5py to save my models in Keras? In order to save your Keras models as HDF5 files, e.g. via `keras.callbacks.ModelCheckpoint`, Keras uses the h5py Python package. It is a dependency of Keras and should be installed by default. On Debian-based distributions, you will have to additionally install `libhdf5`: ``` sudo apt-get install libhdf5-serial-dev ``` If you are unsure if h5py is installed you can open a Python shell and load the module via ``` import h5py ``` If it imports without error it is installed otherwise you can find detailed installation instructions here: http://docs.h5py.org/en/latest/build.html Keras-2.2.4/docs/templates/getting-started/sequential-model-guide.md0000644000000000116100000003214713312516314025247 0ustar rooteng00000000000000# Getting started with the Keras Sequential model The `Sequential` model is a linear stack of layers. You can create a `Sequential` model by passing a list of layer instances to the constructor: ```python from keras.models import Sequential from keras.layers import Dense, Activation model = Sequential([ Dense(32, input_shape=(784,)), Activation('relu'), Dense(10), Activation('softmax'), ]) ``` You can also simply add layers via the `.add()` method: ```python model = Sequential() model.add(Dense(32, input_dim=784)) model.add(Activation('relu')) ``` ---- ## Specifying the input shape The model needs to know what input shape it should expect. For this reason, the first layer in a `Sequential` model (and only the first, because following layers can do automatic shape inference) needs to receive information about its input shape. There are several possible ways to do this: - Pass an `input_shape` argument to the first layer. This is a shape tuple (a tuple of integers or `None` entries, where `None` indicates that any positive integer may be expected). In `input_shape`, the batch dimension is not included. - Some 2D layers, such as `Dense`, support the specification of their input shape via the argument `input_dim`, and some 3D temporal layers support the arguments `input_dim` and `input_length`. - If you ever need to specify a fixed batch size for your inputs (this is useful for stateful recurrent networks), you can pass a `batch_size` argument to a layer. If you pass both `batch_size=32` and `input_shape=(6, 8)` to a layer, it will then expect every batch of inputs to have the batch shape `(32, 6, 8)`. As such, the following snippets are strictly equivalent: ```python model = Sequential() model.add(Dense(32, input_shape=(784,))) ``` ```python model = Sequential() model.add(Dense(32, input_dim=784)) ``` ---- ## Compilation Before training a model, you need to configure the learning process, which is done via the `compile` method. It receives three arguments: - An optimizer. This could be the string identifier of an existing optimizer (such as `rmsprop` or `adagrad`), or an instance of the `Optimizer` class. See: [optimizers](/optimizers). - A loss function. This is the objective that the model will try to minimize. It can be the string identifier of an existing loss function (such as `categorical_crossentropy` or `mse`), or it can be an objective function. See: [losses](/losses). - A list of metrics. For any classification problem you will want to set this to `metrics=['accuracy']`. A metric could be the string identifier of an existing metric or a custom metric function. ```python # For a multi-class classification problem model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) # For a binary classification problem model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) # For a mean squared error regression problem model.compile(optimizer='rmsprop', loss='mse') # For custom metrics import keras.backend as K def mean_pred(y_true, y_pred): return K.mean(y_pred) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy', mean_pred]) ``` ---- ## Training Keras models are trained on Numpy arrays of input data and labels. For training a model, you will typically use the `fit` function. [Read its documentation here](/models/sequential). ```python # For a single-input model with 2 classes (binary classification): model = Sequential() model.add(Dense(32, activation='relu', input_dim=100)) model.add(Dense(1, activation='sigmoid')) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) # Generate dummy data import numpy as np data = np.random.random((1000, 100)) labels = np.random.randint(2, size=(1000, 1)) # Train the model, iterating on the data in batches of 32 samples model.fit(data, labels, epochs=10, batch_size=32) ``` ```python # For a single-input model with 10 classes (categorical classification): model = Sequential() model.add(Dense(32, activation='relu', input_dim=100)) model.add(Dense(10, activation='softmax')) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) # Generate dummy data import numpy as np data = np.random.random((1000, 100)) labels = np.random.randint(10, size=(1000, 1)) # Convert labels to categorical one-hot encoding one_hot_labels = keras.utils.to_categorical(labels, num_classes=10) # Train the model, iterating on the data in batches of 32 samples model.fit(data, one_hot_labels, epochs=10, batch_size=32) ``` ---- ## Examples Here are a few examples to get you started! In the [examples folder](https://github.com/keras-team/keras/tree/master/examples), you will also find example models for real datasets: - CIFAR10 small images classification: Convolutional Neural Network (CNN) with realtime data augmentation - IMDB movie review sentiment classification: LSTM over sequences of words - Reuters newswires topic classification: Multilayer Perceptron (MLP) - MNIST handwritten digits classification: MLP & CNN - Character-level text generation with LSTM ...and more. ### Multilayer Perceptron (MLP) for multi-class softmax classification: ```python import keras from keras.models import Sequential from keras.layers import Dense, Dropout, Activation from keras.optimizers import SGD # Generate dummy data import numpy as np x_train = np.random.random((1000, 20)) y_train = keras.utils.to_categorical(np.random.randint(10, size=(1000, 1)), num_classes=10) x_test = np.random.random((100, 20)) y_test = keras.utils.to_categorical(np.random.randint(10, size=(100, 1)), num_classes=10) model = Sequential() # Dense(64) is a fully-connected layer with 64 hidden units. # in the first layer, you must specify the expected input data shape: # here, 20-dimensional vectors. model.add(Dense(64, activation='relu', input_dim=20)) model.add(Dropout(0.5)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.fit(x_train, y_train, epochs=20, batch_size=128) score = model.evaluate(x_test, y_test, batch_size=128) ``` ### MLP for binary classification: ```python import numpy as np from keras.models import Sequential from keras.layers import Dense, Dropout # Generate dummy data x_train = np.random.random((1000, 20)) y_train = np.random.randint(2, size=(1000, 1)) x_test = np.random.random((100, 20)) y_test = np.random.randint(2, size=(100, 1)) model = Sequential() model.add(Dense(64, input_dim=20, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.fit(x_train, y_train, epochs=20, batch_size=128) score = model.evaluate(x_test, y_test, batch_size=128) ``` ### VGG-like convnet: ```python import numpy as np import keras from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten from keras.layers import Conv2D, MaxPooling2D from keras.optimizers import SGD # Generate dummy data x_train = np.random.random((100, 100, 100, 3)) y_train = keras.utils.to_categorical(np.random.randint(10, size=(100, 1)), num_classes=10) x_test = np.random.random((20, 100, 100, 3)) y_test = keras.utils.to_categorical(np.random.randint(10, size=(20, 1)), num_classes=10) model = Sequential() # input: 100x100 images with 3 channels -> (100, 100, 3) tensors. # this applies 32 convolution filters of size 3x3 each. model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3))) model.add(Conv2D(32, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(256, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd) model.fit(x_train, y_train, batch_size=32, epochs=10) score = model.evaluate(x_test, y_test, batch_size=32) ``` ### Sequence classification with LSTM: ```python from keras.models import Sequential from keras.layers import Dense, Dropout from keras.layers import Embedding from keras.layers import LSTM max_features = 1024 model = Sequential() model.add(Embedding(max_features, output_dim=256)) model.add(LSTM(128)) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=16, epochs=10) score = model.evaluate(x_test, y_test, batch_size=16) ``` ### Sequence classification with 1D convolutions: ```python from keras.models import Sequential from keras.layers import Dense, Dropout from keras.layers import Embedding from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D seq_length = 64 model = Sequential() model.add(Conv1D(64, 3, activation='relu', input_shape=(seq_length, 100))) model.add(Conv1D(64, 3, activation='relu')) model.add(MaxPooling1D(3)) model.add(Conv1D(128, 3, activation='relu')) model.add(Conv1D(128, 3, activation='relu')) model.add(GlobalAveragePooling1D()) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=16, epochs=10) score = model.evaluate(x_test, y_test, batch_size=16) ``` ### Stacked LSTM for sequence classification In this model, we stack 3 LSTM layers on top of each other, making the model capable of learning higher-level temporal representations. The first two LSTMs return their full output sequences, but the last one only returns the last step in its output sequence, thus dropping the temporal dimension (i.e. converting the input sequence into a single vector). stacked LSTM ```python from keras.models import Sequential from keras.layers import LSTM, Dense import numpy as np data_dim = 16 timesteps = 8 num_classes = 10 # expected input data shape: (batch_size, timesteps, data_dim) model = Sequential() model.add(LSTM(32, return_sequences=True, input_shape=(timesteps, data_dim))) # returns a sequence of vectors of dimension 32 model.add(LSTM(32, return_sequences=True)) # returns a sequence of vectors of dimension 32 model.add(LSTM(32)) # return a single vector of dimension 32 model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) # Generate dummy training data x_train = np.random.random((1000, timesteps, data_dim)) y_train = np.random.random((1000, num_classes)) # Generate dummy validation data x_val = np.random.random((100, timesteps, data_dim)) y_val = np.random.random((100, num_classes)) model.fit(x_train, y_train, batch_size=64, epochs=5, validation_data=(x_val, y_val)) ``` ### Same stacked LSTM model, rendered "stateful" A stateful recurrent model is one for which the internal states (memories) obtained after processing a batch of samples are reused as initial states for the samples of the next batch. This allows to process longer sequences while keeping computational complexity manageable. [You can read more about stateful RNNs in the FAQ.](/getting-started/faq/#how-can-i-use-stateful-rnns) ```python from keras.models import Sequential from keras.layers import LSTM, Dense import numpy as np data_dim = 16 timesteps = 8 num_classes = 10 batch_size = 32 # Expected input batch shape: (batch_size, timesteps, data_dim) # Note that we have to provide the full batch_input_shape since the network is stateful. # the sample of index i in batch k is the follow-up for the sample i in batch k-1. model = Sequential() model.add(LSTM(32, return_sequences=True, stateful=True, batch_input_shape=(batch_size, timesteps, data_dim))) model.add(LSTM(32, return_sequences=True, stateful=True)) model.add(LSTM(32, stateful=True)) model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) # Generate dummy training data x_train = np.random.random((batch_size * 10, timesteps, data_dim)) y_train = np.random.random((batch_size * 10, num_classes)) # Generate dummy validation data x_val = np.random.random((batch_size * 3, timesteps, data_dim)) y_val = np.random.random((batch_size * 3, num_classes)) model.fit(x_train, y_train, batch_size=batch_size, epochs=5, shuffle=False, validation_data=(x_val, y_val)) ``` Keras-2.2.4/docs/templates/index.md0000644000000000116100000000025113202372135016674 0ustar rooteng00000000000000# Keras: The Python Deep Learning library {{autogenerated}}Keras-2.2.4/docs/templates/metrics.md0000644000000000116100000000272013227311000017225 0ustar rooteng00000000000000 ## Usage of metrics A metric is a function that is used to judge the performance of your model. Metric functions are to be supplied in the `metrics` parameter when a model is compiled. ```python model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['mae', 'acc']) ``` ```python from keras import metrics model.compile(loss='mean_squared_error', optimizer='sgd', metrics=[metrics.mae, metrics.categorical_accuracy]) ``` A metric function is similar to a [loss function](/losses), except that the results from evaluating a metric are not used when training the model. You can either pass the name of an existing metric, or pass a Theano/TensorFlow symbolic function (see [Custom metrics](#custom-metrics)). #### Arguments - __y_true__: True labels. Theano/TensorFlow tensor. - __y_pred__: Predictions. Theano/TensorFlow tensor of the same shape as y_true. #### Returns Single tensor value representing the mean of the output array across all datapoints. ---- ## Available metrics {{autogenerated}} ---- ## Custom metrics Custom metrics can be passed at the compilation step. The function would need to take `(y_true, y_pred)` as arguments and return a single tensor value. ```python import keras.backend as K def mean_pred(y_true, y_pred): return K.mean(y_pred) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy', mean_pred]) ``` Keras-2.2.4/docs/templates/preprocessing/0000755000000000116100000000000013355226624020142 5ustar rooteng00000000000000Keras-2.2.4/docs/templates/preprocessing/text.md0000644000000000116100000000005313254254664021451 0ustar rooteng00000000000000 ### Text Preprocessing {{autogenerated}} Keras-2.2.4/docs/templates/preprocessing/image.md0000644000000000116100000000005213267437307021547 0ustar rooteng00000000000000 # Image Preprocessing {{autogenerated}} Keras-2.2.4/docs/templates/datasets.md0000644000000000116100000001710513354530144017407 0ustar rooteng00000000000000# Datasets ## CIFAR10 small image classification Dataset of 50,000 32x32 color training images, labeled over 10 categories, and 10,000 test images. ### Usage: ```python from keras.datasets import cifar10 (x_train, y_train), (x_test, y_test) = cifar10.load_data() ``` - __Returns:__ - 2 tuples: - __x_train, x_test__: uint8 array of RGB image data with shape (num_samples, 3, 32, 32) or (num_samples, 32, 32, 3) based on the `image_data_format` backend setting of either `channels_first` or `channels_last` respectively. - __y_train, y_test__: uint8 array of category labels (integers in range 0-9) with shape (num_samples,). --- ## CIFAR100 small image classification Dataset of 50,000 32x32 color training images, labeled over 100 categories, and 10,000 test images. ### Usage: ```python from keras.datasets import cifar100 (x_train, y_train), (x_test, y_test) = cifar100.load_data(label_mode='fine') ``` - __Returns:__ - 2 tuples: - __x_train, x_test__: uint8 array of RGB image data with shape (num_samples, 3, 32, 32) or (num_samples, 32, 32, 3) based on the `image_data_format` backend setting of either `channels_first` or `channels_last` respectively. - __y_train, y_test__: uint8 array of category labels with shape (num_samples,). - __Arguments:__ - __label_mode__: "fine" or "coarse". --- ## IMDB Movie reviews sentiment classification Dataset of 25,000 movies reviews from IMDB, labeled by sentiment (positive/negative). Reviews have been preprocessed, and each review is encoded as a [sequence](preprocessing/sequence.md) of word indexes (integers). For convenience, words are indexed by overall frequency in the dataset, so that for instance the integer "3" encodes the 3rd most frequent word in the data. This allows for quick filtering operations such as: "only consider the top 10,000 most common words, but eliminate the top 20 most common words". As a convention, "0" does not stand for a specific word, but instead is used to encode any unknown word. ### Usage: ```python from keras.datasets import imdb (x_train, y_train), (x_test, y_test) = imdb.load_data(path="imdb.npz", num_words=None, skip_top=0, maxlen=None, seed=113, start_char=1, oov_char=2, index_from=3) ``` - __Returns:__ - 2 tuples: - __x_train, x_test__: list of sequences, which are lists of indexes (integers). If the num_words argument was specific, the maximum possible index value is num_words-1. If the maxlen argument was specified, the largest possible sequence length is maxlen. - __y_train, y_test__: list of integer labels (1 or 0). - __Arguments:__ - __path__: if you do not have the data locally (at `'~/.keras/datasets/' + path`), it will be downloaded to this location. - __num_words__: integer or None. Top most frequent words to consider. Any less frequent word will appear as `oov_char` value in the sequence data. - __skip_top__: integer. Top most frequent words to ignore (they will appear as `oov_char` value in the sequence data). - __maxlen__: int. Maximum sequence length. Any longer sequence will be truncated. - __seed__: int. Seed for reproducible data shuffling. - __start_char__: int. The start of a sequence will be marked with this character. Set to 1 because 0 is usually the padding character. - __oov_char__: int. words that were cut out because of the `num_words` or `skip_top` limit will be replaced with this character. - __index_from__: int. Index actual words with this index and higher. --- ## Reuters newswire topics classification Dataset of 11,228 newswires from Reuters, labeled over 46 topics. As with the IMDB dataset, each wire is encoded as a sequence of word indexes (same conventions). ### Usage: ```python from keras.datasets import reuters (x_train, y_train), (x_test, y_test) = reuters.load_data(path="reuters.npz", num_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, start_char=1, oov_char=2, index_from=3) ``` The specifications are the same as that of the IMDB dataset, with the addition of: - __test_split__: float. Fraction of the dataset to be used as test data. This dataset also makes available the word index used for encoding the sequences: ```python word_index = reuters.get_word_index(path="reuters_word_index.json") ``` - __Returns:__ A dictionary where key are words (str) and values are indexes (integer). eg. `word_index["giraffe"]` might return `1234`. - __Arguments:__ - __path__: if you do not have the index file locally (at `'~/.keras/datasets/' + path`), it will be downloaded to this location. --- ## MNIST database of handwritten digits Dataset of 60,000 28x28 grayscale images of the 10 digits, along with a test set of 10,000 images. ### Usage: ```python from keras.datasets import mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() ``` - __Returns:__ - 2 tuples: - __x_train, x_test__: uint8 array of grayscale image data with shape (num_samples, 28, 28). - __y_train, y_test__: uint8 array of digit labels (integers in range 0-9) with shape (num_samples,). - __Arguments:__ - __path__: if you do not have the index file locally (at `'~/.keras/datasets/' + path`), it will be downloaded to this location. --- ## Fashion-MNIST database of fashion articles Dataset of 60,000 28x28 grayscale images of 10 fashion categories, along with a test set of 10,000 images. This dataset can be used as a drop-in replacement for MNIST. The class labels are: | Label | Description | | --- | --- | | 0 | T-shirt/top | | 1 | Trouser | | 2 | Pullover | | 3 | Dress | | 4 | Coat | | 5 | Sandal | | 6 | Shirt | | 7 | Sneaker | | 8 | Bag | | 9 | Ankle boot | ### Usage: ```python from keras.datasets import fashion_mnist (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() ``` - __Returns:__ - 2 tuples: - __x_train, x_test__: uint8 array of grayscale image data with shape (num_samples, 28, 28). - __y_train, y_test__: uint8 array of labels (integers in range 0-9) with shape (num_samples,). --- ## Boston housing price regression dataset Dataset taken from the StatLib library which is maintained at Carnegie Mellon University. Samples contain 13 attributes of houses at different locations around the Boston suburbs in the late 1970s. Targets are the median values of the houses at a location (in k$). ### Usage: ```python from keras.datasets import boston_housing (x_train, y_train), (x_test, y_test) = boston_housing.load_data() ``` - __Arguments:__ - __path__: path where to cache the dataset locally (relative to ~/.keras/datasets). - __seed__: Random seed for shuffling the data before computing the test split. - __test_split__: fraction of the data to reserve as test set. - __Returns:__ Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. Keras-2.2.4/docs/templates/regularizers.md0000644000000000116100000000275313326715636020332 0ustar rooteng00000000000000## Usage of regularizers Regularizers allow to apply penalties on layer parameters or layer activity during optimization. These penalties are incorporated in the loss function that the network optimizes. The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `Conv1D`, `Conv2D` and `Conv3D` have a unified API. These layers expose 3 keyword arguments: - `kernel_regularizer`: instance of `keras.regularizers.Regularizer` - `bias_regularizer`: instance of `keras.regularizers.Regularizer` - `activity_regularizer`: instance of `keras.regularizers.Regularizer` ## Example ```python from keras import regularizers model.add(Dense(64, input_dim=64, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))) ``` ## Available penalties ```python keras.regularizers.l1(0.) keras.regularizers.l2(0.) keras.regularizers.l1_l2(l1=0.01, l2=0.01) ``` ## Developing new regularizers Any function that takes in a weight matrix and returns a loss contribution tensor can be used as a regularizer, e.g.: ```python from keras import backend as K def l1_reg(weight_matrix): return 0.01 * K.sum(K.abs(weight_matrix)) model.add(Dense(64, input_dim=64, kernel_regularizer=l1_reg)) ``` Alternatively, you can write your regularizers in an object-oriented way; see the [keras/regularizers.py](https://github.com/keras-team/keras/blob/master/keras/regularizers.py) module for examples. Keras-2.2.4/docs/templates/visualization.md0000644000000000116100000000342113354530144020474 0ustar rooteng00000000000000 ## Model visualization The `keras.utils.vis_utils` module provides utility functions to plot a Keras model (using `graphviz`). This will plot a graph of the model and save it to a file: ```python from keras.utils import plot_model plot_model(model, to_file='model.png') ``` `plot_model` takes two optional arguments: - `show_shapes` (defaults to False) controls whether output shapes are shown in the graph. - `show_layer_names` (defaults to True) controls whether layer names are shown in the graph. You can also directly obtain the `pydot.Graph` object and render it yourself, for example to show it in an ipython notebook : ```python from IPython.display import SVG from keras.utils.vis_utils import model_to_dot SVG(model_to_dot(model).create(prog='dot', format='svg')) ``` ## Training history visualization The `fit()` method on a Keras `Model` returns a `History` object. The `History.history` attribute is a dictionary recording training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). Here is a simple example using `matplotlib` to generate loss & accuracy plots for training & validation: ```python import matplotlib.pyplot as plt history = model.fit(x, y, validation_split=0.25, epochs=50, batch_size=16, verbose=1) # Plot training & validation accuracy values plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('Model accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.show() # Plot training & validation loss values plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.show() ``` Keras-2.2.4/docs/templates/activations.md0000644000000000116100000000162113247612467020131 0ustar rooteng00000000000000 ## Usage of activations Activations can either be used through an `Activation` layer, or through the `activation` argument supported by all forward layers: ```python from keras.layers import Activation, Dense model.add(Dense(64)) model.add(Activation('tanh')) ``` This is equivalent to: ```python model.add(Dense(64, activation='tanh')) ``` You can also pass an element-wise TensorFlow/Theano/CNTK function as an activation: ```python from keras import backend as K model.add(Dense(64, activation=K.tanh)) ``` ## Available activations {{autogenerated}} ## On "Advanced Activations" Activations that are more complex than a simple TensorFlow/Theano/CNTK function (eg. learnable activations, which maintain a state) are available as [Advanced Activation layers](layers/advanced-activations.md), and can be found in the module `keras.layers.advanced_activations`. These include `PReLU` and `LeakyReLU`. Keras-2.2.4/docs/templates/applications.md0000644000000000116100000007244613354530144020276 0ustar rooteng00000000000000# Applications Keras Applications are deep learning models that are made available alongside pre-trained weights. These models can be used for prediction, feature extraction, and fine-tuning. Weights are downloaded automatically when instantiating a model. They are stored at `~/.keras/models/`. ## Available models ### Models for image classification with weights trained on ImageNet: - [Xception](#xception) - [VGG16](#vgg16) - [VGG19](#vgg19) - [ResNet50](#resnet50) - [InceptionV3](#inceptionv3) - [InceptionResNetV2](#inceptionresnetv2) - [MobileNet](#mobilenet) - [DenseNet](#densenet) - [NASNet](#nasnet) - [MobileNetV2](#mobilenetv2) All of these architectures are compatible with all the backends (TensorFlow, Theano, and CNTK), and upon instantiation the models will be built according to the image data format set in your Keras configuration file at `~/.keras/keras.json`. For instance, if you have set `image_data_format=channels_last`, then any model loaded from this repository will get built according to the TensorFlow data format convention, "Height-Width-Depth". Note that: - For `Keras < 2.2.0`, The Xception model is only available for TensorFlow, due to its reliance on `SeparableConvolution` layers. - For `Keras < 2.1.5`, The MobileNet model is only available for TensorFlow, due to its reliance on `DepthwiseConvolution` layers. ----- ## Usage examples for image classification models ### Classify ImageNet classes with ResNet50 ```python from keras.applications.resnet50 import ResNet50 from keras.preprocessing import image from keras.applications.resnet50 import preprocess_input, decode_predictions import numpy as np model = ResNet50(weights='imagenet') img_path = 'elephant.jpg' img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) preds = model.predict(x) # decode the results into a list of tuples (class, description, probability) # (one such list for each sample in the batch) print('Predicted:', decode_predictions(preds, top=3)[0]) # Predicted: [(u'n02504013', u'Indian_elephant', 0.82658225), (u'n01871265', u'tusker', 0.1122357), (u'n02504458', u'African_elephant', 0.061040461)] ``` ### Extract features with VGG16 ```python from keras.applications.vgg16 import VGG16 from keras.preprocessing import image from keras.applications.vgg16 import preprocess_input import numpy as np model = VGG16(weights='imagenet', include_top=False) img_path = 'elephant.jpg' img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) features = model.predict(x) ``` ### Extract features from an arbitrary intermediate layer with VGG19 ```python from keras.applications.vgg19 import VGG19 from keras.preprocessing import image from keras.applications.vgg19 import preprocess_input from keras.models import Model import numpy as np base_model = VGG19(weights='imagenet') model = Model(inputs=base_model.input, outputs=base_model.get_layer('block4_pool').output) img_path = 'elephant.jpg' img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) block4_pool_features = model.predict(x) ``` ### Fine-tune InceptionV3 on a new set of classes ```python from keras.applications.inception_v3 import InceptionV3 from keras.preprocessing import image from keras.models import Model from keras.layers import Dense, GlobalAveragePooling2D from keras import backend as K # create the base pre-trained model base_model = InceptionV3(weights='imagenet', include_top=False) # add a global spatial average pooling layer x = base_model.output x = GlobalAveragePooling2D()(x) # let's add a fully-connected layer x = Dense(1024, activation='relu')(x) # and a logistic layer -- let's say we have 200 classes predictions = Dense(200, activation='softmax')(x) # this is the model we will train model = Model(inputs=base_model.input, outputs=predictions) # first: train only the top layers (which were randomly initialized) # i.e. freeze all convolutional InceptionV3 layers for layer in base_model.layers: layer.trainable = False # compile the model (should be done *after* setting layers to non-trainable) model.compile(optimizer='rmsprop', loss='categorical_crossentropy') # train the model on the new data for a few epochs model.fit_generator(...) # at this point, the top layers are well trained and we can start fine-tuning # convolutional layers from inception V3. We will freeze the bottom N layers # and train the remaining top layers. # let's visualize layer names and layer indices to see how many layers # we should freeze: for i, layer in enumerate(base_model.layers): print(i, layer.name) # we chose to train the top 2 inception blocks, i.e. we will freeze # the first 249 layers and unfreeze the rest: for layer in model.layers[:249]: layer.trainable = False for layer in model.layers[249:]: layer.trainable = True # we need to recompile the model for these modifications to take effect # we use SGD with a low learning rate from keras.optimizers import SGD model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy') # we train our model again (this time fine-tuning the top 2 inception blocks # alongside the top Dense layers model.fit_generator(...) ``` ### Build InceptionV3 over a custom input tensor ```python from keras.applications.inception_v3 import InceptionV3 from keras.layers import Input # this could also be the output a different Keras model or layer input_tensor = Input(shape=(224, 224, 3)) # this assumes K.image_data_format() == 'channels_last' model = InceptionV3(input_tensor=input_tensor, weights='imagenet', include_top=True) ``` ----- # Documentation for individual models | Model | Size | Top-1 Accuracy | Top-5 Accuracy | Parameters | Depth | | ----- | ----: | --------------: | --------------: | ----------: | -----: | | [Xception](#xception) | 88 MB | 0.790 | 0.945 | 22,910,480 | 126 | | [VGG16](#vgg16) | 528 MB | 0.713 | 0.901 | 138,357,544 | 23 | | [VGG19](#vgg19) | 549 MB | 0.713 | 0.900 | 143,667,240 | 26 | | [ResNet50](#resnet50) | 99 MB | 0.749 | 0.921 | 25,636,712 | 168 | | [InceptionV3](#inceptionv3) | 92 MB | 0.779 | 0.937 | 23,851,784 | 159 | | [InceptionResNetV2](#inceptionresnetv2) | 215 MB | 0.803 | 0.953 | 55,873,736 | 572 | | [MobileNet](#mobilenet) | 16 MB | 0.704 | 0.895 | 4,253,864 | 88 | | [MobileNetV2](#mobilenetv2) | 14 MB | 0.713 | 0.901 | 3,538,984 | 88 | | [DenseNet121](#densenet) | 33 MB | 0.750 | 0.923 | 8,062,504 | 121 | | [DenseNet169](#densenet) | 57 MB | 0.762 | 0.932 | 14,307,880 | 169 | | [DenseNet201](#densenet) | 80 MB | 0.773 | 0.936 | 20,242,984 | 201 | | [NASNetMobile](#nasnet) | 23 MB | 0.744 | 0.919 | 5,326,716 | - | | [NASNetLarge](#nasnet) | 343 MB | 0.825 | 0.960 | 88,949,818 | - | The top-1 and top-5 accuracy refers to the model's performance on the ImageNet validation dataset. ----- ## Xception ```python keras.applications.xception.Xception(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) ``` Xception V1 model, with weights pre-trained on ImageNet. On ImageNet, this model gets to a top-1 validation accuracy of 0.790 and a top-5 validation accuracy of 0.945. Note that this model only supports the data format `'channels_last'` (height, width, channels). The default input size for this model is 299x299. ### Arguments - include_top: whether to include the fully-connected layer at the top of the network. - weights: one of `None` (random initialization) or `'imagenet'` (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. - input_shape: optional shape tuple, only to be specified if `include_top` is `False` (otherwise the input shape has to be `(299, 299, 3)`. It should have exactly 3 inputs channels, and width and height should be no smaller than 71. E.g. `(150, 150, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `'avg'` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `'max'` means that global max pooling will be applied. - classes: optional number of classes to classify images into, only to be specified if `include_top` is `True`, and if no `weights` argument is specified. ### Returns A Keras `Model` instance. ### References - [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357) ### License These weights are trained by ourselves and are released under the MIT license. ----- ## VGG16 ```python keras.applications.vgg16.VGG16(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) ``` VGG16 model, with weights pre-trained on ImageNet. This model can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels). The default input size for this model is 224x224. ### Arguments - include_top: whether to include the 3 fully-connected layers at the top of the network. - weights: one of `None` (random initialization) or `'imagenet'` (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. - input_shape: optional shape tuple, only to be specified if `include_top` is `False` (otherwise the input shape has to be `(224, 224, 3)` (with `'channels_last'` data format) or `(3, 224, 224)` (with `'channels_first'` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `'avg'` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `'max'` means that global max pooling will be applied. - classes: optional number of classes to classify images into, only to be specified if `include_top` is `True`, and if no `weights` argument is specified. ### Returns A Keras `Model` instance. ### References - [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556): please cite this paper if you use the VGG models in your work. ### License These weights are ported from the ones [released by VGG at Oxford](http://www.robots.ox.ac.uk/~vgg/research/very_deep/) under the [Creative Commons Attribution License](https://creativecommons.org/licenses/by/4.0/). ----- ## VGG19 ```python keras.applications.vgg19.VGG19(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) ``` VGG19 model, with weights pre-trained on ImageNet. This model can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels). The default input size for this model is 224x224. ### Arguments - include_top: whether to include the 3 fully-connected layers at the top of the network. - weights: one of `None` (random initialization) or `'imagenet'` (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. - input_shape: optional shape tuple, only to be specified if `include_top` is `False` (otherwise the input shape has to be `(224, 224, 3)` (with `'channels_last'` data format) or `(3, 224, 224)` (with `'channels_first'` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `'avg'` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `'max'` means that global max pooling will be applied. - classes: optional number of classes to classify images into, only to be specified if `include_top` is `True`, and if no `weights` argument is specified. ### Returns A Keras `Model` instance. ### References - [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) ### License These weights are ported from the ones [released by VGG at Oxford](http://www.robots.ox.ac.uk/~vgg/research/very_deep/) under the [Creative Commons Attribution License](https://creativecommons.org/licenses/by/4.0/). ----- ## ResNet50 ```python keras.applications.resnet50.ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) ``` ResNet50 model, with weights pre-trained on ImageNet. This model and can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels). The default input size for this model is 224x224. ### Arguments - include_top: whether to include the fully-connected layer at the top of the network. - weights: one of `None` (random initialization) or `'imagenet'` (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. - input_shape: optional shape tuple, only to be specified if `include_top` is `False` (otherwise the input shape has to be `(224, 224, 3)` (with `'channels_last'` data format) or `(3, 224, 224)` (with `'channels_first'` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `'avg'` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `'max'` means that global max pooling will be applied. - classes: optional number of classes to classify images into, only to be specified if `include_top` is `True`, and if no `weights` argument is specified. ### Returns A Keras `Model` instance. ### References - [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) ### License These weights are ported from the ones [released by Kaiming He](https://github.com/KaimingHe/deep-residual-networks) under the [MIT license](https://github.com/KaimingHe/deep-residual-networks/blob/master/LICENSE). ----- ## InceptionV3 ```python keras.applications.inception_v3.InceptionV3(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) ``` Inception V3 model, with weights pre-trained on ImageNet. This model and can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels). The default input size for this model is 299x299. ### Arguments - include_top: whether to include the fully-connected layer at the top of the network. - weights: one of `None` (random initialization) or `'imagenet'` (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. - input_shape: optional shape tuple, only to be specified if `include_top` is `False` (otherwise the input shape has to be `(299, 299, 3)` (with `'channels_last'` data format) or `(3, 299, 299)` (with `'channels_first'` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 75. E.g. `(150, 150, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `'avg'` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `'max'` means that global max pooling will be applied. - classes: optional number of classes to classify images into, only to be specified if `include_top` is `True`, and if no `weights` argument is specified. ### Returns A Keras `Model` instance. ### References - [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567) ### License These weights are released under [the Apache License](https://github.com/tensorflow/models/blob/master/LICENSE). ----- ## InceptionResNetV2 ```python keras.applications.inception_resnet_v2.InceptionResNetV2(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) ``` Inception-ResNet V2 model, with weights pre-trained on ImageNet. This model and can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels). The default input size for this model is 299x299. ### Arguments - include_top: whether to include the fully-connected layer at the top of the network. - weights: one of `None` (random initialization) or `'imagenet'` (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. - input_shape: optional shape tuple, only to be specified if `include_top` is `False` (otherwise the input shape has to be `(299, 299, 3)` (with `'channels_last'` data format) or `(3, 299, 299)` (with `'channels_first'` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 75. E.g. `(150, 150, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `'avg'` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `'max'` means that global max pooling will be applied. - classes: optional number of classes to classify images into, only to be specified if `include_top` is `True`, and if no `weights` argument is specified. ### Returns A Keras `Model` instance. ### References - [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261) ### License These weights are released under [the Apache License](https://github.com/tensorflow/models/blob/master/LICENSE). ----- ## MobileNet ```python keras.applications.mobilenet.MobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000) ``` MobileNet model, with weights pre-trained on ImageNet. Note that this model only supports the data format `'channels_last'` (height, width, channels). The default input size for this model is 224x224. ### Arguments - input_shape: optional shape tuple, only to be specified if `include_top` is `False` (otherwise the input shape has to be `(224, 224, 3)` (with `'channels_last'` data format) or `(3, 224, 224)` (with `'channels_first'` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. - alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. - depth_multiplier: depth multiplier for depthwise convolution (also called the resolution multiplier) - dropout: dropout rate - include_top: whether to include the fully-connected layer at the top of the network. - weights: `None` (random initialization) or `'imagenet'` (ImageNet weights) - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. - pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `'avg'` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `'max'` means that global max pooling will be applied. - classes: optional number of classes to classify images into, only to be specified if `include_top` is `True`, and if no `weights` argument is specified. ### Returns A Keras `Model` instance. ### References - [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf) ### License These weights are released under [the Apache License](https://github.com/tensorflow/models/blob/master/LICENSE). ----- ## DenseNet ```python keras.applications.densenet.DenseNet121(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) keras.applications.densenet.DenseNet169(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) keras.applications.densenet.DenseNet201(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) ``` DenseNet models, with weights pre-trained on ImageNet. This model and can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels). The default input size for this model is 224x224. ### Arguments - blocks: numbers of building blocks for the four dense layers. - include_top: whether to include the fully-connected layer at the top of the network. - weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. - input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `'channels_last'` data format) or `(3, 224, 224)` (with `'channels_first'` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. - pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. - classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. ### Returns A Keras model instance. ### References - [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993) (CVPR 2017 Best Paper Award) ### License These weights are released under [the BSD 3-clause License](https://github.com/liuzhuang13/DenseNet/blob/master/LICENSE). ----- ## NASNet ```python keras.applications.nasnet.NASNetLarge(input_shape=None, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000) keras.applications.nasnet.NASNetMobile(input_shape=None, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000) ``` Neural Architecture Search Network (NASNet) models, with weights pre-trained on ImageNet. The default input size for the NASNetLarge model is 331x331 and for the NASNetMobile model is 224x224. ### Arguments - input_shape: optional shape tuple, only to be specified if `include_top` is `False` (otherwise the input shape has to be `(224, 224, 3)` (with `'channels_last'` data format) or `(3, 224, 224)` (with `'channels_first'` data format) for NASNetMobile or `(331, 331, 3)` (with `'channels_last'` data format) or `(3, 331, 331)` (with `'channels_first'` data format) for NASNetLarge. It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. - include_top: whether to include the fully-connected layer at the top of the network. - weights: `None` (random initialization) or `'imagenet'` (ImageNet weights) - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. - pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `'avg'` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `'max'` means that global max pooling will be applied. - classes: optional number of classes to classify images into, only to be specified if `include_top` is `True`, and if no `weights` argument is specified. ### Returns A Keras `Model` instance. ### References - [Learning Transferable Architectures for Scalable Image Recognition](https://arxiv.org/abs/1707.07012) ### License These weights are released under [the Apache License](https://github.com/tensorflow/models/blob/master/LICENSE). ----- ## MobileNetV2 ```python keras.applications.mobilenetv2.MobileNetV2(input_shape=None, alpha=1.0, depth_multiplier=1, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000) ``` MobileNetV2 model, with weights pre-trained on ImageNet. Note that this model only supports the data format `'channels_last'` (height, width, channels). The default input size for this model is 224x224. ### Arguments - input_shape: optional shape tuple, to be specified if you would like to use a model with an input img resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. `(160, 160, 3)` would be one valid value. - alpha: controls the width of the network. This is known as the width multiplier in the MobileNetV2 paper. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. - depth_multiplier: depth multiplier for depthwise convolution (also called the resolution multiplier) - include_top: whether to include the fully-connected layer at the top of the network. - weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. - pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `'avg'` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `'max'` means that global max pooling will be applied. - classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. ### Returns A Keras model instance. ### Raises ValueError: in case of invalid argument for `weights`, or invalid input shape or invalid depth_multiplier, alpha, rows when weights='imagenet' ### References - [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) ### License These weights are released under [the Apache License](https://github.com/tensorflow/models/blob/master/LICENSE). Keras-2.2.4/README.md0000644000000000116100000002055713305602621013607 0ustar rooteng00000000000000# Keras: Deep Learning for humans ![Keras logo](https://s3.amazonaws.com/keras.io/img/keras-logo-2018-large-1200.png) [![Build Status](https://travis-ci.org/keras-team/keras.svg?branch=master)](https://travis-ci.org/keras-team/keras) [![license](https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000)](https://github.com/keras-team/keras/blob/master/LICENSE) ## You have just found Keras. Keras is a high-level neural networks API, written in Python and capable of running on top of [TensorFlow](https://github.com/tensorflow/tensorflow), [CNTK](https://github.com/Microsoft/cntk), or [Theano](https://github.com/Theano/Theano). It was developed with a focus on enabling fast experimentation. *Being able to go from idea to result with the least possible delay is key to doing good research.* Use Keras if you need a deep learning library that: - Allows for easy and fast prototyping (through user friendliness, modularity, and extensibility). - Supports both convolutional networks and recurrent networks, as well as combinations of the two. - Runs seamlessly on CPU and GPU. Read the documentation at [Keras.io](https://keras.io). Keras is compatible with: __Python 2.7-3.6__. ------------------ ## Guiding principles - __User friendliness.__ Keras is an API designed for human beings, not machines. It puts user experience front and center. Keras follows best practices for reducing cognitive load: it offers consistent & simple APIs, it minimizes the number of user actions required for common use cases, and it provides clear and actionable feedback upon user error. - __Modularity.__ A model is understood as a sequence or a graph of standalone, fully-configurable modules that can be plugged together with as few restrictions as possible. In particular, neural layers, cost functions, optimizers, initialization schemes, activation functions, regularization schemes are all standalone modules that you can combine to create new models. - __Easy extensibility.__ New modules are simple to add (as new classes and functions), and existing modules provide ample examples. To be able to easily create new modules allows for total expressiveness, making Keras suitable for advanced research. - __Work with Python__. No separate models configuration files in a declarative format. Models are described in Python code, which is compact, easier to debug, and allows for ease of extensibility. ------------------ ## Getting started: 30 seconds to Keras The core data structure of Keras is a __model__, a way to organize layers. The simplest type of model is the [`Sequential`](https://keras.io/getting-started/sequential-model-guide) model, a linear stack of layers. For more complex architectures, you should use the [Keras functional API](https://keras.io/getting-started/functional-api-guide), which allows to build arbitrary graphs of layers. Here is the `Sequential` model: ```python from keras.models import Sequential model = Sequential() ``` Stacking layers is as easy as `.add()`: ```python from keras.layers import Dense model.add(Dense(units=64, activation='relu', input_dim=100)) model.add(Dense(units=10, activation='softmax')) ``` Once your model looks good, configure its learning process with `.compile()`: ```python model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) ``` If you need to, you can further configure your optimizer. A core principle of Keras is to make things reasonably simple, while allowing the user to be fully in control when they need to (the ultimate control being the easy extensibility of the source code). ```python model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)) ``` You can now iterate on your training data in batches: ```python # x_train and y_train are Numpy arrays --just like in the Scikit-Learn API. model.fit(x_train, y_train, epochs=5, batch_size=32) ``` Alternatively, you can feed batches to your model manually: ```python model.train_on_batch(x_batch, y_batch) ``` Evaluate your performance in one line: ```python loss_and_metrics = model.evaluate(x_test, y_test, batch_size=128) ``` Or generate predictions on new data: ```python classes = model.predict(x_test, batch_size=128) ``` Building a question answering system, an image classification model, a Neural Turing Machine, or any other model is just as fast. The ideas behind deep learning are simple, so why should their implementation be painful? For a more in-depth tutorial about Keras, you can check out: - [Getting started with the Sequential model](https://keras.io/getting-started/sequential-model-guide) - [Getting started with the functional API](https://keras.io/getting-started/functional-api-guide) In the [examples folder](https://github.com/keras-team/keras/tree/master/examples) of the repository, you will find more advanced models: question-answering with memory networks, text generation with stacked LSTMs, etc. ------------------ ## Installation Before installing Keras, please install one of its backend engines: TensorFlow, Theano, or CNTK. We recommend the TensorFlow backend. - [TensorFlow installation instructions](https://www.tensorflow.org/install/). - [Theano installation instructions](http://deeplearning.net/software/theano/install.html#install). - [CNTK installation instructions](https://docs.microsoft.com/en-us/cognitive-toolkit/setup-cntk-on-your-machine). You may also consider installing the following **optional dependencies**: - [cuDNN](https://docs.nvidia.com/deeplearning/sdk/cudnn-install/) (recommended if you plan on running Keras on GPU). - HDF5 and [h5py](http://docs.h5py.org/en/latest/build.html) (required if you plan on saving Keras models to disk). - [graphviz](https://graphviz.gitlab.io/download/) and [pydot](https://github.com/erocarrera/pydot) (used by [visualization utilities](https://keras.io/visualization/) to plot model graphs). Then, you can install Keras itself. There are two ways to install Keras: - **Install Keras from PyPI (recommended):** ```sh sudo pip install keras ``` If you are using a virtualenv, you may want to avoid using sudo: ```sh pip install keras ``` - **Alternatively: install Keras from the GitHub source:** First, clone Keras using `git`: ```sh git clone https://github.com/keras-team/keras.git ``` Then, `cd` to the Keras folder and run the install command: ```sh cd keras sudo python setup.py install ``` ------------------ ## Configuring your Keras backend By default, Keras will use TensorFlow as its tensor manipulation library. [Follow these instructions](https://keras.io/backend/) to configure the Keras backend. ------------------ ## Support You can ask questions and join the development discussion: - On the [Keras Google group](https://groups.google.com/forum/#!forum/keras-users). - On the [Keras Slack channel](https://kerasteam.slack.com). Use [this link](https://keras-slack-autojoin.herokuapp.com/) to request an invitation to the channel. You can also post **bug reports and feature requests** (only) in [GitHub issues](https://github.com/keras-team/keras/issues). Make sure to read [our guidelines](https://github.com/keras-team/keras/blob/master/CONTRIBUTING.md) first. ------------------ ## Why this name, Keras? Keras (κέρας) means _horn_ in Greek. It is a reference to a literary image from ancient Greek and Latin literature, first found in the _Odyssey_, where dream spirits (_Oneiroi_, singular _Oneiros_) are divided between those who deceive men with false visions, who arrive to Earth through a gate of ivory, and those who announce a future that will come to pass, who arrive through a gate of horn. It's a play on the words κέρας (horn) / κραίνω (fulfill), and ἐλέφας (ivory) / ἐλεφαίρομαι (deceive). Keras was initially developed as part of the research effort of project ONEIROS (Open-ended Neuro-Electronic Intelligent Robot Operating System). >_"Oneiroi are beyond our unravelling --who can be sure what tale they tell? Not all that men look for comes to pass. Two gates there are that give passage to fleeting Oneiroi; one is made of horn, one of ivory. The Oneiroi that pass through sawn ivory are deceitful, bearing a message that will not be fulfilled; those that come out through polished horn have truth behind them, to be accomplished for men who see them."_ Homer, Odyssey 19. 562 ff (Shewring translation). ------------------ Keras-2.2.4/setup.py0000644000000000116100000000442113355226611014040 0ustar rooteng00000000000000from setuptools import setup from setuptools import find_packages long_description = ''' Keras is a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano. Use Keras if you need a deep learning library that: - Allows for easy and fast prototyping (through user friendliness, modularity, and extensibility). - Supports both convolutional networks and recurrent networks, as well as combinations of the two. - Runs seamlessly on CPU and GPU. Read the documentation at: https://keras.io/ For a detailed overview of what makes Keras special, see: https://keras.io/why-use-keras/ Keras is compatible with Python 2.7-3.6 and is distributed under the MIT license. ''' setup(name='Keras', version='2.2.4', description='Deep Learning for humans', long_description=long_description, author='Francois Chollet', author_email='francois.chollet@gmail.com', url='https://github.com/keras-team/keras', download_url='https://github.com/keras-team/keras/tarball/2.2.4', license='MIT', install_requires=['numpy>=1.9.1', 'scipy>=0.14', 'six>=1.9.0', 'pyyaml', 'h5py', 'keras_applications>=1.0.6', 'keras_preprocessing>=1.0.5'], extras_require={ 'visualize': ['pydot>=1.2.4'], 'tests': ['pytest', 'pytest-pep8', 'pytest-xdist', 'pytest-cov', 'pytest-timeout', 'pandas', 'requests'], }, classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', 'Intended Audience :: Education', 'Intended Audience :: Science/Research', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.6', 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Libraries :: Python Modules' ], packages=find_packages()) Keras-2.2.4/CONTRIBUTING.md0000644000000000116100000001643013227311000014543 0ustar rooteng00000000000000# On Github Issues and Pull Requests Found a bug? Have a new feature to suggest? Want to contribute changes to the codebase? Make sure to read this first. ## Bug reporting Your code doesn't work, and you have determined that the issue lies with Keras? Follow these steps to report a bug. 1. Your bug may already be fixed. Make sure to update to the current Keras master branch, as well as the latest Theano/TensorFlow/CNTK master branch. To easily update Theano: `pip install git+git://github.com/Theano/Theano.git --upgrade` 2. Search for similar issues. Make sure to delete `is:open` on the issue search to find solved tickets as well. It's possible somebody has encountered this bug already. Also remember to check out Keras' [FAQ](http://keras.io/faq/). Still having a problem? Open an issue on Github to let us know. 3. Make sure you provide us with useful information about your configuration: what OS are you using? What Keras backend are you using? Are you running on GPU? If so, what is your version of Cuda, of cuDNN? What is your GPU? 4. Provide us with a script to reproduce the issue. This script should be runnable as-is and should not require external data download (use randomly generated data if you need to run a model on some test data). We recommend that you use Github Gists to post your code. Any issue that cannot be reproduced is likely to be closed. 5. If possible, take a stab at fixing the bug yourself --if you can! The more information you provide, the easier it is for us to validate that there is a bug and the faster we'll be able to take action. If you want your issue to be resolved quickly, following the steps above is crucial. --- ## Requesting a Feature You can also use Github issues to request features you would like to see in Keras, or changes in the Keras API. 1. Provide a clear and detailed explanation of the feature you want and why it's important to add. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on library for Keras. It is crucial for Keras to avoid bloating the API and codebase. 2. Provide code snippets demonstrating the API you have in mind and illustrating the use cases of your feature. Of course, you don't need to write any real code at this point! 3. After discussing the feature you may choose to attempt a Pull Request. If you're at all able, start writing some code. We always have more work to do than time to do it. If you can write some code then that will speed the process along. --- ## Requests for Contributions [This is the board](https://github.com/keras-team/keras/projects/1) where we list current outstanding issues and features to be added. If you want to start contributing to Keras, this is the place to start. --- ## Pull Requests **Where should I submit my pull request?** 1. **Keras improvements and bugfixes** go to the [Keras `master` branch](https://github.com/keras-team/keras/tree/master). 2. **Experimental new features** such as layers and datasets go to [keras-contrib](https://github.com/farizrahman4u/keras-contrib). Unless it is a new feature listed in [Requests for Contributions](https://github.com/keras-team/keras/projects/1), in which case it belongs in core Keras. If you think your feature belongs in core Keras, you can submit a design doc to explain your feature and argue for it (see explanations below). Please note that PRs that are primarily about **code style** (as opposed to fixing bugs, improving docs, or adding new functionality) will likely be rejected. Here's a quick guide to submitting your improvements: 1. If your PR introduces a change in functionality, make sure you start by writing a design doc and sending it to the Keras mailing list to discuss whether the change should be made, and how to handle it. This will save you from having your PR closed down the road! Of course, if your PR is a simple bug fix, you don't need to do that. The process for writing and submitting design docs is as follow: - Start from [this Google Doc template](https://docs.google.com/document/d/1ZXNfce77LDW9tFAj6U5ctaJmI5mT7CQXOFMEAZo-mAA/edit#), and copy it to new Google doc. - Fill in the content. Note that you will need to insert code examples. To insert code, use a Google Doc extension such as [CodePretty](https://chrome.google.com/webstore/detail/code-pretty/igjbncgfgnfpbnifnnlcmjfbnidkndnh?hl=en) (there are several such extensions available). - Set sharing settings to "everyone with the link is allowed to comment" - Send the document to `keras-users@googlegroups.com` with a subject that starts with `[API DESIGN REVIEW]` (all caps) so that we notice it. - Wait for comments, and answer them as they come. Edit the proposal as necessary. - The proposal will finally be approved or rejected. Once approved, you can send out Pull Requests or ask others to write Pull Requests. 2. Write the code (or get others to write it). This is the hard part! 3. Make sure any new function or class you introduce has proper docstrings. Make sure any code you touch still has up-to-date docstrings and documentation. **Docstring style should be respected.** In particular, they should be formatted in MarkDown, and there should be sections for `Arguments`, `Returns`, `Raises` (if applicable). Look at other docstrings in the codebase for examples. 4. Write tests. Your code should have full unit test coverage. If you want to see your PR merged promptly, this is crucial. 5. Run our test suite locally. It's easy: from the Keras folder, simply run: `py.test tests/`. - You will need to install the test requirements as well: `pip install -e .[tests]`. 6. Make sure all tests are passing: - with the Theano backend, on Python 2.7 and Python 3.6. Make sure you have the development version of Theano. - with the TensorFlow backend, on Python 2.7 and Python 3.6. Make sure you have the development version of TensorFlow. - with the CNTK backend, on Python 2.7 and Python 3.6. Make sure you have the development version of CNTK. 7. We use PEP8 syntax conventions, but we aren't dogmatic when it comes to line length. Make sure your lines stay reasonably sized, though. To make your life easier, we recommend running a PEP8 linter: - Install PEP8 packages: `pip install pep8 pytest-pep8 autopep8` - Run a standalone PEP8 check: `py.test --pep8 -m pep8` - You can automatically fix some PEP8 error by running: `autopep8 -i --select ` for example: `autopep8 -i --select E128 tests/keras/backend/test_backends.py` 8. When committing, use appropriate, descriptive commit messages. 9. Update the documentation. If introducing new functionality, make sure you include code snippets demonstrating the usage of your new feature. 10. Submit your PR. If your changes have been approved in a previous discussion, and if you have complete (and passing) unit tests as well as proper docstrings/documentation, your PR is likely to be merged promptly. --- ## Adding new examples Even if you don't contribute to the Keras source code, if you have an application of Keras that is concise and powerful, please consider adding it to our collection of examples. [Existing examples](https://github.com/keras-team/keras/tree/master/examples) show idiomatic Keras code: make sure to keep your own script in the same spirit. Keras-2.2.4/examples/0000755000000000116100000000000013355226624014147 5ustar rooteng00000000000000Keras-2.2.4/examples/cifar10_cnn_tfaugment2d.py0000644000000000116100000001575413247612467021124 0ustar rooteng00000000000000'''Train a simple deep CNN on the CIFAR10 small images dataset. Using Tensorflow internal augmentation APIs by replacing ImageGenerator with an embedded AugmentLayer using LambdaLayer, which is faster on GPU. # Benchmark of `ImageGenerator` vs `AugmentLayer` both using augmentation 2D: (backend = Tensorflow-GPU, Nvidia Tesla P100-SXM2) Settings: horizontal_flip = True ---------------------------------------------------------------------------- Epoch | ImageGenerator | ImageGenerator | AugmentLayer | Augment Layer Number | %Accuracy | Performance | %Accuracy | Performance ---------------------------------------------------------------------------- 1 | 44.84 | 15ms/step | 45.54 | 358us/step 2 | 52.34 | 8ms/step | 50.55 | 285us/step 8 | 65.45 | 8ms/step | 65.59 | 281us/step 25 | 76.74 | 8ms/step | 76.17 | 280us/step 100 | 78.81 | 8ms/step | 78.70 | 285us/step --------------------------------------------------------------------------- Settings: rotation = 30.0 ---------------------------------------------------------------------------- Epoch | ImageGenerator | ImageGenerator | AugmentLayer | Augment Layer Number | %Accuracy | Performance | %Accuracy | Performance ---------------------------------------------------------------------------- 1 | 43.46 | 15ms/step | 42.21 | 334us/step 2 | 48.95 | 11ms/step | 48.06 | 282us/step 8 | 63.59 | 11ms/step | 61.35 | 290us/step 25 | 72.25 | 12ms/step | 71.08 | 287us/step 100 | 76.35 | 11ms/step | 74.62 | 286us/step --------------------------------------------------------------------------- (Corner process and rotation precision by `ImageGenerator` and `AugmentLayer` are slightly different.) ''' from __future__ import print_function import keras from keras.datasets import cifar10 from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten from keras.layers import Conv2D, Lambda, MaxPooling2D from keras import backend as K import os if K.backend() != 'tensorflow': raise RuntimeError('This example can only run with the ' 'TensorFlow backend, ' 'because it requires TF-native augmentation APIs') import tensorflow as tf def augment_2d(inputs, rotation=0, horizontal_flip=False, vertical_flip=False): """Apply additive augmentation on 2D data. # Arguments rotation: A float, the degree range for rotation (0 <= rotation < 180), e.g. 3 for random image rotation between (-3.0, 3.0). horizontal_flip: A boolean, whether to allow random horizontal flip, e.g. true for 50% possibility to flip image horizontally. vertical_flip: A boolean, whether to allow random vertical flip, e.g. true for 50% possibility to flip image vertically. # Returns input data after augmentation, whose shape is the same as its original. """ if inputs.dtype != tf.float32: inputs = tf.image.convert_image_dtype(inputs, dtype=tf.float32) with tf.name_scope('augmentation'): shp = tf.shape(inputs) batch_size, height, width = shp[0], shp[1], shp[2] width = tf.cast(width, tf.float32) height = tf.cast(height, tf.float32) transforms = [] identity = tf.constant([1, 0, 0, 0, 1, 0, 0, 0], dtype=tf.float32) if rotation > 0: angle_rad = rotation * 3.141592653589793 / 180.0 angles = tf.random_uniform([batch_size], -angle_rad, angle_rad) f = tf.contrib.image.angles_to_projective_transforms(angles, height, width) transforms.append(f) if horizontal_flip: coin = tf.less(tf.random_uniform([batch_size], 0, 1.0), 0.5) shape = [-1., 0., width, 0., 1., 0., 0., 0.] flip_transform = tf.convert_to_tensor(shape, dtype=tf.float32) flip = tf.tile(tf.expand_dims(flip_transform, 0), [batch_size, 1]) noflip = tf.tile(tf.expand_dims(identity, 0), [batch_size, 1]) transforms.append(tf.where(coin, flip, noflip)) if vertical_flip: coin = tf.less(tf.random_uniform([batch_size], 0, 1.0), 0.5) shape = [1., 0., 0., 0., -1., height, 0., 0.] flip_transform = tf.convert_to_tensor(shape, dtype=tf.float32) flip = tf.tile(tf.expand_dims(flip_transform, 0), [batch_size, 1]) noflip = tf.tile(tf.expand_dims(identity, 0), [batch_size, 1]) transforms.append(tf.where(coin, flip, noflip)) if transforms: f = tf.contrib.image.compose_transforms(*transforms) inputs = tf.contrib.image.transform(inputs, f, interpolation='BILINEAR') return inputs batch_size = 32 num_classes = 10 epochs = 100 num_predictions = 20 save_dir = '/tmp/saved_models' model_name = 'keras_cifar10_trained_model.h5' # The data, split between train and test sets: (x_train, y_train), (x_test, y_test) = cifar10.load_data() print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # Convert class vectors to binary class matrices. y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add(Lambda(augment_2d, input_shape=x_train.shape[1:], arguments={'rotation': 8.0, 'horizontal_flip': True})) model.add(Conv2D(32, (3, 3), padding='same')) model.add(Activation('relu')) model.add(Conv2D(32, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(64, (3, 3), padding='same')) model.add(Activation('relu')) model.add(Conv2D(64, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('softmax')) # initiate RMSprop optimizer opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) # Let's train the model using RMSprop model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True) # Save model and weights if not os.path.isdir(save_dir): os.makedirs(save_dir) model_path = os.path.join(save_dir, model_name) model.save(model_path) print('Saved trained model at %s ' % model_path) # Score trained model. scores = model.evaluate(x_test, y_test, verbose=1) print('Test loss:', scores[0]) print('Test accuracy:', scores[1]) Keras-2.2.4/examples/mnist_mlp.py0000644000000000116100000000313713247612467016533 0ustar rooteng00000000000000'''Trains a simple deep NN on the MNIST dataset. Gets to 98.40% test accuracy after 20 epochs (there is *a lot* of margin for parameter tuning). 2 seconds per epoch on a K520 GPU. ''' from __future__ import print_function import keras from keras.datasets import mnist from keras.models import Sequential from keras.layers import Dense, Dropout from keras.optimizers import RMSprop batch_size = 128 num_classes = 10 epochs = 20 # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(60000, 784) x_test = x_test.reshape(10000, 784) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add(Dense(512, activation='relu', input_shape=(784,))) model.add(Dropout(0.2)) model.add(Dense(512, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(num_classes, activation='softmax')) model.summary() model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy']) history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) Keras-2.2.4/examples/mnist_irnn.py0000644000000000116100000000435213247612467016711 0ustar rooteng00000000000000'''This is a reproduction of the IRNN experiment with pixel-by-pixel sequential MNIST in "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" by Quoc V. Le, Navdeep Jaitly, Geoffrey E. Hinton arxiv:1504.00941v2 [cs.NE] 7 Apr 2015 http://arxiv.org/pdf/1504.00941v2.pdf Optimizer is replaced with RMSprop which yields more stable and steady improvement. Reaches 0.93 train/test accuracy after 900 epochs (which roughly corresponds to 1687500 steps in the original paper.) ''' from __future__ import print_function import keras from keras.datasets import mnist from keras.models import Sequential from keras.layers import Dense, Activation from keras.layers import SimpleRNN from keras import initializers from keras.optimizers import RMSprop batch_size = 32 num_classes = 10 epochs = 200 hidden_units = 100 learning_rate = 1e-6 clip_norm = 1.0 # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(x_train.shape[0], -1, 1) x_test = x_test.reshape(x_test.shape[0], -1, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) print('Evaluate IRNN...') model = Sequential() model.add(SimpleRNN(hidden_units, kernel_initializer=initializers.RandomNormal(stddev=0.001), recurrent_initializer=initializers.Identity(gain=1.0), activation='relu', input_shape=x_train.shape[1:])) model.add(Dense(num_classes)) model.add(Activation('softmax')) rmsprop = RMSprop(lr=learning_rate) model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) scores = model.evaluate(x_test, y_test, verbose=0) print('IRNN test score:', scores[0]) print('IRNN test accuracy:', scores[1]) Keras-2.2.4/examples/mnist_cnn.py0000644000000000116100000000432113247612467016515 0ustar rooteng00000000000000'''Trains a simple convnet on the MNIST dataset. Gets to 99.25% test accuracy after 12 epochs (there is still a lot of margin for parameter tuning). 16 seconds per epoch on a GRID K520 GPU. ''' from __future__ import print_function import keras from keras.datasets import mnist from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten from keras.layers import Conv2D, MaxPooling2D from keras import backend as K batch_size = 128 num_classes = 10 epochs = 12 # input image dimensions img_rows, img_cols = 28, 28 # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) Keras-2.2.4/examples/lstm_text_generation.py0000644000000000116100000000665213342055016020757 0ustar rooteng00000000000000'''Example script to generate text from Nietzsche's writings. At least 20 epochs are required before the generated text starts sounding coherent. It is recommended to run this script on GPU, as recurrent networks are quite computationally intensive. If you try this script on new data, make sure your corpus has at least ~100k characters. ~1M is better. ''' from __future__ import print_function from keras.callbacks import LambdaCallback from keras.models import Sequential from keras.layers import Dense from keras.layers import LSTM from keras.optimizers import RMSprop from keras.utils.data_utils import get_file import numpy as np import random import sys import io path = get_file( 'nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt') with io.open(path, encoding='utf-8') as f: text = f.read().lower() print('corpus length:', len(text)) chars = sorted(list(set(text))) print('total chars:', len(chars)) char_indices = dict((c, i) for i, c in enumerate(chars)) indices_char = dict((i, c) for i, c in enumerate(chars)) # cut the text in semi-redundant sequences of maxlen characters maxlen = 40 step = 3 sentences = [] next_chars = [] for i in range(0, len(text) - maxlen, step): sentences.append(text[i: i + maxlen]) next_chars.append(text[i + maxlen]) print('nb sequences:', len(sentences)) print('Vectorization...') x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) y = np.zeros((len(sentences), len(chars)), dtype=np.bool) for i, sentence in enumerate(sentences): for t, char in enumerate(sentence): x[i, t, char_indices[char]] = 1 y[i, char_indices[next_chars[i]]] = 1 # build the model: a single LSTM print('Build model...') model = Sequential() model.add(LSTM(128, input_shape=(maxlen, len(chars)))) model.add(Dense(len(chars), activation='softmax')) optimizer = RMSprop(lr=0.01) model.compile(loss='categorical_crossentropy', optimizer=optimizer) def sample(preds, temperature=1.0): # helper function to sample an index from a probability array preds = np.asarray(preds).astype('float64') preds = np.log(preds) / temperature exp_preds = np.exp(preds) preds = exp_preds / np.sum(exp_preds) probas = np.random.multinomial(1, preds, 1) return np.argmax(probas) def on_epoch_end(epoch, _): # Function invoked at end of each epoch. Prints generated text. print() print('----- Generating text after Epoch: %d' % epoch) start_index = random.randint(0, len(text) - maxlen - 1) for diversity in [0.2, 0.5, 1.0, 1.2]: print('----- diversity:', diversity) generated = '' sentence = text[start_index: start_index + maxlen] generated += sentence print('----- Generating with seed: "' + sentence + '"') sys.stdout.write(generated) for i in range(400): x_pred = np.zeros((1, maxlen, len(chars))) for t, char in enumerate(sentence): x_pred[0, t, char_indices[char]] = 1. preds = model.predict(x_pred, verbose=0)[0] next_index = sample(preds, diversity) next_char = indices_char[next_index] generated += next_char sentence = sentence[1:] + next_char sys.stdout.write(next_char) sys.stdout.flush() print() print_callback = LambdaCallback(on_epoch_end=on_epoch_end) model.fit(x, y, batch_size=128, epochs=60, callbacks=[print_callback]) Keras-2.2.4/examples/variational_autoencoder_deconv.py0000644000000000116100000001661113354530144022757 0ustar rooteng00000000000000'''Example of VAE on MNIST dataset using CNN The VAE has a modular design. The encoder, decoder and VAE are 3 models that share weights. After training the VAE model, the encoder can be used to generate latent vectors. The decoder can be used to generate MNIST digits by sampling the latent vector from a Gaussian distribution with mean=0 and std=1. # Reference [1] Kingma, Diederik P., and Max Welling. "Auto-encoding variational bayes." https://arxiv.org/abs/1312.6114 ''' from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras.layers import Dense, Input from keras.layers import Conv2D, Flatten, Lambda from keras.layers import Reshape, Conv2DTranspose from keras.models import Model from keras.datasets import mnist from keras.losses import mse, binary_crossentropy from keras.utils import plot_model from keras import backend as K import numpy as np import matplotlib.pyplot as plt import argparse import os # reparameterization trick # instead of sampling from Q(z|X), sample eps = N(0,I) # then z = z_mean + sqrt(var)*eps def sampling(args): """Reparameterization trick by sampling fr an isotropic unit Gaussian. # Arguments: args (tensor): mean and log of variance of Q(z|X) # Returns: z (tensor): sampled latent vector """ z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] # by default, random_normal has mean=0 and std=1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon def plot_results(models, data, batch_size=128, model_name="vae_mnist"): """Plots labels and MNIST digits as function of 2-dim latent vector # Arguments: models (tuple): encoder and decoder models data (tuple): test data and label batch_size (int): prediction batch size model_name (string): which model is using this function """ encoder, decoder = models x_test, y_test = data os.makedirs(model_name, exist_ok=True) filename = os.path.join(model_name, "vae_mean.png") # display a 2D plot of the digit classes in the latent space z_mean, _, _ = encoder.predict(x_test, batch_size=batch_size) plt.figure(figsize=(12, 10)) plt.scatter(z_mean[:, 0], z_mean[:, 1], c=y_test) plt.colorbar() plt.xlabel("z[0]") plt.ylabel("z[1]") plt.savefig(filename) plt.show() filename = os.path.join(model_name, "digits_over_latent.png") # display a 30x30 2D manifold of digits n = 30 digit_size = 28 figure = np.zeros((digit_size * n, digit_size * n)) # linearly spaced coordinates corresponding to the 2D plot # of digit classes in the latent space grid_x = np.linspace(-4, 4, n) grid_y = np.linspace(-4, 4, n)[::-1] for i, yi in enumerate(grid_y): for j, xi in enumerate(grid_x): z_sample = np.array([[xi, yi]]) x_decoded = decoder.predict(z_sample) digit = x_decoded[0].reshape(digit_size, digit_size) figure[i * digit_size: (i + 1) * digit_size, j * digit_size: (j + 1) * digit_size] = digit plt.figure(figsize=(10, 10)) start_range = digit_size // 2 end_range = n * digit_size + start_range + 1 pixel_range = np.arange(start_range, end_range, digit_size) sample_range_x = np.round(grid_x, 1) sample_range_y = np.round(grid_y, 1) plt.xticks(pixel_range, sample_range_x) plt.yticks(pixel_range, sample_range_y) plt.xlabel("z[0]") plt.ylabel("z[1]") plt.imshow(figure, cmap='Greys_r') plt.savefig(filename) plt.show() # MNIST dataset (x_train, y_train), (x_test, y_test) = mnist.load_data() image_size = x_train.shape[1] x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 # network parameters input_shape = (image_size, image_size, 1) batch_size = 128 kernel_size = 3 filters = 16 latent_dim = 2 epochs = 30 # VAE model = encoder + decoder # build encoder model inputs = Input(shape=input_shape, name='encoder_input') x = inputs for i in range(2): filters *= 2 x = Conv2D(filters=filters, kernel_size=kernel_size, activation='relu', strides=2, padding='same')(x) # shape info needed to build decoder model shape = K.int_shape(x) # generate latent vector Q(z|X) x = Flatten()(x) x = Dense(16, activation='relu')(x) z_mean = Dense(latent_dim, name='z_mean')(x) z_log_var = Dense(latent_dim, name='z_log_var')(x) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var]) # instantiate encoder model encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') encoder.summary() plot_model(encoder, to_file='vae_cnn_encoder.png', show_shapes=True) # build decoder model latent_inputs = Input(shape=(latent_dim,), name='z_sampling') x = Dense(shape[1] * shape[2] * shape[3], activation='relu')(latent_inputs) x = Reshape((shape[1], shape[2], shape[3]))(x) for i in range(2): x = Conv2DTranspose(filters=filters, kernel_size=kernel_size, activation='relu', strides=2, padding='same')(x) filters //= 2 outputs = Conv2DTranspose(filters=1, kernel_size=kernel_size, activation='sigmoid', padding='same', name='decoder_output')(x) # instantiate decoder model decoder = Model(latent_inputs, outputs, name='decoder') decoder.summary() plot_model(decoder, to_file='vae_cnn_decoder.png', show_shapes=True) # instantiate VAE model outputs = decoder(encoder(inputs)[2]) vae = Model(inputs, outputs, name='vae') if __name__ == '__main__': parser = argparse.ArgumentParser() help_ = "Load h5 model trained weights" parser.add_argument("-w", "--weights", help=help_) help_ = "Use mse loss instead of binary cross entropy (default)" parser.add_argument("-m", "--mse", help=help_, action='store_true') args = parser.parse_args() models = (encoder, decoder) data = (x_test, y_test) # VAE loss = mse_loss or xent_loss + kl_loss if args.mse: reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs)) else: reconstruction_loss = binary_crossentropy(K.flatten(inputs), K.flatten(outputs)) reconstruction_loss *= image_size * image_size kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='rmsprop') vae.summary() plot_model(vae, to_file='vae_cnn.png', show_shapes=True) if args.weights: vae.load_weights(args.weights) else: # train the autoencoder vae.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None)) vae.save_weights('vae_cnn_mnist.h5') plot_results(models, data, batch_size=batch_size, model_name="vae_cnn") Keras-2.2.4/examples/lstm_seq2seq_restore.py0000644000000000116100000001321313240665765020714 0ustar rooteng00000000000000'''Restore a character-level sequence to sequence model from disk and use it to generate predictions. This script loads the s2s.h5 model saved by lstm_seq2seq.py and generates sequences from it. It assumes that no changes have been made (for example: latent_dim is unchanged, and the input data and model architecture are unchanged). See lstm_seq2seq.py for more details on the model architecture and how it is trained. ''' from __future__ import print_function from keras.models import Model, load_model from keras.layers import Input import numpy as np batch_size = 64 # Batch size for training. epochs = 100 # Number of epochs to train for. latent_dim = 256 # Latent dimensionality of the encoding space. num_samples = 10000 # Number of samples to train on. # Path to the data txt file on disk. data_path = 'fra-eng/fra.txt' # Vectorize the data. We use the same approach as the training script. # NOTE: the data must be identical, in order for the character -> integer # mappings to be consistent. # We omit encoding target_texts since they are not needed. input_texts = [] target_texts = [] input_characters = set() target_characters = set() with open(data_path, 'r', encoding='utf-8') as f: lines = f.read().split('\n') for line in lines[: min(num_samples, len(lines) - 1)]: input_text, target_text = line.split('\t') # We use "tab" as the "start sequence" character # for the targets, and "\n" as "end sequence" character. target_text = '\t' + target_text + '\n' input_texts.append(input_text) target_texts.append(target_text) for char in input_text: if char not in input_characters: input_characters.add(char) for char in target_text: if char not in target_characters: target_characters.add(char) input_characters = sorted(list(input_characters)) target_characters = sorted(list(target_characters)) num_encoder_tokens = len(input_characters) num_decoder_tokens = len(target_characters) max_encoder_seq_length = max([len(txt) for txt in input_texts]) max_decoder_seq_length = max([len(txt) for txt in target_texts]) print('Number of samples:', len(input_texts)) print('Number of unique input tokens:', num_encoder_tokens) print('Number of unique output tokens:', num_decoder_tokens) print('Max sequence length for inputs:', max_encoder_seq_length) print('Max sequence length for outputs:', max_decoder_seq_length) input_token_index = dict( [(char, i) for i, char in enumerate(input_characters)]) target_token_index = dict( [(char, i) for i, char in enumerate(target_characters)]) encoder_input_data = np.zeros( (len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32') for i, input_text in enumerate(input_texts): for t, char in enumerate(input_text): encoder_input_data[i, t, input_token_index[char]] = 1. # Restore the model and construct the encoder and decoder. model = load_model('s2s.h5') encoder_inputs = model.input[0] # input_1 encoder_outputs, state_h_enc, state_c_enc = model.layers[2].output # lstm_1 encoder_states = [state_h_enc, state_c_enc] encoder_model = Model(encoder_inputs, encoder_states) decoder_inputs = model.input[1] # input_2 decoder_state_input_h = Input(shape=(latent_dim,), name='input_3') decoder_state_input_c = Input(shape=(latent_dim,), name='input_4') decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] decoder_lstm = model.layers[3] decoder_outputs, state_h_dec, state_c_dec = decoder_lstm( decoder_inputs, initial_state=decoder_states_inputs) decoder_states = [state_h_dec, state_c_dec] decoder_dense = model.layers[4] decoder_outputs = decoder_dense(decoder_outputs) decoder_model = Model( [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states) # Reverse-lookup token index to decode sequences back to # something readable. reverse_input_char_index = dict( (i, char) for char, i in input_token_index.items()) reverse_target_char_index = dict( (i, char) for char, i in target_token_index.items()) # Decodes an input sequence. Future work should support beam search. def decode_sequence(input_seq): # Encode the input as state vectors. states_value = encoder_model.predict(input_seq) # Generate empty target sequence of length 1. target_seq = np.zeros((1, 1, num_decoder_tokens)) # Populate the first character of target sequence with the start character. target_seq[0, 0, target_token_index['\t']] = 1. # Sampling loop for a batch of sequences # (to simplify, here we assume a batch of size 1). stop_condition = False decoded_sentence = '' while not stop_condition: output_tokens, h, c = decoder_model.predict( [target_seq] + states_value) # Sample a token sampled_token_index = np.argmax(output_tokens[0, -1, :]) sampled_char = reverse_target_char_index[sampled_token_index] decoded_sentence += sampled_char # Exit condition: either hit max length # or find stop character. if (sampled_char == '\n' or len(decoded_sentence) > max_decoder_seq_length): stop_condition = True # Update the target sequence (of length 1). target_seq = np.zeros((1, 1, num_decoder_tokens)) target_seq[0, 0, sampled_token_index] = 1. # Update states states_value = [h, c] return decoded_sentence for seq_index in range(100): # Take one sequence (part of the training set) # for trying out decoding. input_seq = encoder_input_data[seq_index: seq_index + 1] decoded_sentence = decode_sequence(input_seq) print('-') print('Input sentence:', input_texts[seq_index]) print('Decoded sentence:', decoded_sentence) Keras-2.2.4/examples/mnist_tfrecord.py0000644000000000116100000002226413342055016017540 0ustar rooteng00000000000000'''MNIST dataset with TFRecords, the standard TensorFlow data format. TFRecord is a data format supported throughout TensorFlow. This example demonstrates how to load TFRecord data using Input Tensors. Input Tensors differ from the normal Keras workflow because instead of fitting to data loaded into a a numpy array, data is supplied via a special tensor that reads data from nodes that are wired directly into model graph with the `Input(tensor=input_tensor)` parameter. There are several advantages to using Input Tensors. First, if a dataset is already in TFRecord format you can load and train on that data directly in Keras. Second, extended backend API capabilities such as TensorFlow data augmentation is easy to integrate directly into your Keras training scripts via input tensors. Third, TensorFlow implements several data APIs for TFRecords, some of which provide significantly faster training performance than numpy arrays can provide because they run via the C++ backend. Please note that this example is tailored for brevity and clarity and not to demonstrate performance or augmentation capabilities. Input Tensors also have important disadvantages. In particular, Input Tensors are fixed at model construction because rewiring networks is not yet supported. For this reason, changing the data input source means model weights must be saved and the model rebuilt from scratch to connect the new input data. validation cannot currently be performed as training progresses, and must be performed after training completes. This example demonstrates how to train with input tensors, save the model weights, and then evaluate the model using the numpy based Keras API. Gets to ~99.1% test accuracy after 5 epochs (high variance from run to run: 98.9-99.3). ''' import numpy as np import os import tensorflow as tf import keras from keras import backend as K from keras import layers from keras.callbacks import Callback from tensorflow.contrib.learn.python.learn.datasets import mnist if K.backend() != 'tensorflow': raise RuntimeError('This example can only run with the ' 'TensorFlow backend, ' 'because it requires TFRecords, which ' 'are not supported on other platforms.') class EvaluateInputTensor(Callback): """ Validate a model which does not expect external numpy data during training. Keras does not expect external numpy data at training time, and thus cannot accept numpy arrays for validation when all of a Keras Model's `Input(input_tensor)` layers are provided an `input_tensor` parameter, and the call to `Model.compile(target_tensors)` defines all `target_tensors`. Instead, create a second model for validation which is also configured with input tensors and add it to the `EvaluateInputTensor` callback to perform validation. It is recommended that this callback be the first in the list of callbacks because it defines the validation variables required by many other callbacks, and Callbacks are made in order. # Arguments model: Keras model on which to call model.evaluate(). steps: Integer or `None`. Total number of steps (batches of samples) before declaring the evaluation round finished. Ignored with the default value of `None`. """ def __init__(self, model, steps, metrics_prefix='val', verbose=1): # parameter of callbacks passed during initialization # pass evalation mode directly super(EvaluateInputTensor, self).__init__() self.val_model = model self.num_steps = steps self.verbose = verbose self.metrics_prefix = metrics_prefix def on_epoch_end(self, epoch, logs={}): self.val_model.set_weights(self.model.get_weights()) results = self.val_model.evaluate(None, None, steps=int(self.num_steps), verbose=self.verbose) metrics_str = '\n' for result, name in zip(results, self.val_model.metrics_names): metric_name = self.metrics_prefix + '_' + name logs[metric_name] = result if self.verbose > 0: metrics_str = metrics_str + metric_name + ': ' + str(result) + ' ' if self.verbose > 0: print(metrics_str) def cnn_layers(x_train_input): x = layers.Conv2D(32, (3, 3), activation='relu', padding='valid')(x_train_input) x = layers.MaxPooling2D(pool_size=(2, 2))(x) x = layers.Conv2D(64, (3, 3), activation='relu')(x) x = layers.MaxPooling2D(pool_size=(2, 2))(x) x = layers.Flatten()(x) x = layers.Dense(512, activation='relu')(x) x = layers.Dropout(0.5)(x) x_train_out = layers.Dense(num_classes, activation='softmax', name='x_train_out')(x) return x_train_out sess = K.get_session() batch_size = 100 batch_shape = (batch_size, 28, 28, 1) epochs = 5 num_classes = 10 # The capacity variable controls the maximum queue size # allowed when prefetching data for training. capacity = 10000 # min_after_dequeue is the minimum number elements in the queue # after a dequeue, which ensures sufficient mixing of elements. min_after_dequeue = 3000 # If `enqueue_many` is `False`, `tensors` is assumed to represent a # single example. An input tensor with shape `[x, y, z]` will be output # as a tensor with shape `[batch_size, x, y, z]`. # # If `enqueue_many` is `True`, `tensors` is assumed to represent a # batch of examples, where the first dimension is indexed by example, # and all members of `tensors` should have the same size in the # first dimension. If an input tensor has shape `[*, x, y, z]`, the # output will have shape `[batch_size, x, y, z]`. enqueue_many = True cache_dir = os.path.expanduser( os.path.join('~', '.keras', 'datasets', 'MNIST-data')) data = mnist.read_data_sets(cache_dir, validation_size=0) x_train_batch, y_train_batch = tf.train.shuffle_batch( tensors=[data.train.images, data.train.labels.astype(np.int32)], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue, enqueue_many=enqueue_many, num_threads=8) x_train_batch = tf.cast(x_train_batch, tf.float32) x_train_batch = tf.reshape(x_train_batch, shape=batch_shape) y_train_batch = tf.cast(y_train_batch, tf.int32) y_train_batch = tf.one_hot(y_train_batch, num_classes) x_batch_shape = x_train_batch.get_shape().as_list() y_batch_shape = y_train_batch.get_shape().as_list() model_input = layers.Input(tensor=x_train_batch) model_output = cnn_layers(model_input) train_model = keras.models.Model(inputs=model_input, outputs=model_output) # Pass the target tensor `y_train_batch` to `compile` # via the `target_tensors` keyword argument: train_model.compile(optimizer=keras.optimizers.RMSprop(lr=2e-3, decay=1e-5), loss='categorical_crossentropy', metrics=['accuracy'], target_tensors=[y_train_batch]) train_model.summary() x_test_batch, y_test_batch = tf.train.batch( tensors=[data.test.images, data.test.labels.astype(np.int32)], batch_size=batch_size, capacity=capacity, enqueue_many=enqueue_many, num_threads=8) # Create a separate test model # to perform validation during training x_test_batch = tf.cast(x_test_batch, tf.float32) x_test_batch = tf.reshape(x_test_batch, shape=batch_shape) y_test_batch = tf.cast(y_test_batch, tf.int32) y_test_batch = tf.one_hot(y_test_batch, num_classes) x_test_batch_shape = x_test_batch.get_shape().as_list() y_test_batch_shape = y_test_batch.get_shape().as_list() test_model_input = layers.Input(tensor=x_test_batch) test_model_output = cnn_layers(test_model_input) test_model = keras.models.Model(inputs=test_model_input, outputs=test_model_output) # Pass the target tensor `y_test_batch` to `compile` # via the `target_tensors` keyword argument: test_model.compile(optimizer=keras.optimizers.RMSprop(lr=2e-3, decay=1e-5), loss='categorical_crossentropy', metrics=['accuracy'], target_tensors=[y_test_batch]) # Fit the model using data from the TFRecord data tensors. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) train_model.fit( epochs=epochs, steps_per_epoch=int(np.ceil(data.train.num_examples / float(batch_size))), callbacks=[EvaluateInputTensor(test_model, steps=100)]) # Save the model weights. train_model.save_weights('saved_wt.h5') # Clean up the TF session. coord.request_stop() coord.join(threads) K.clear_session() # Second Session to test loading trained model without tensors x_test = np.reshape(data.test.images, (data.test.images.shape[0], 28, 28, 1)) y_test = data.test.labels x_test_inp = layers.Input(shape=(x_test.shape[1:])) test_out = cnn_layers(x_test_inp) test_model = keras.models.Model(inputs=x_test_inp, outputs=test_out) test_model.load_weights('saved_wt.h5') test_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) test_model.summary() loss, acc = test_model.evaluate(x_test, keras.utils.to_categorical(y_test), batch_size=batch_size) print('\nTest accuracy: {0}'.format(acc)) Keras-2.2.4/examples/imdb_fasttext.py0000644000000000116100000001152313342055016017347 0ustar rooteng00000000000000'''This example demonstrates the use of fasttext for text classification Based on Joulin et al's paper: Bags of Tricks for Efficient Text Classification https://arxiv.org/abs/1607.01759 Results on IMDB datasets with uni and bi-gram embeddings: Uni-gram: 0.8813 test accuracy after 5 epochs. 8s/epoch on i7 cpu. Bi-gram : 0.9056 test accuracy after 5 epochs. 2s/epoch on GTx 980M gpu. ''' from __future__ import print_function import numpy as np from keras.preprocessing import sequence from keras.models import Sequential from keras.layers import Dense from keras.layers import Embedding from keras.layers import GlobalAveragePooling1D from keras.datasets import imdb def create_ngram_set(input_list, ngram_value=2): """ Extract a set of n-grams from a list of integers. >>> create_ngram_set([1, 4, 9, 4, 1, 4], ngram_value=2) {(4, 9), (4, 1), (1, 4), (9, 4)} >>> create_ngram_set([1, 4, 9, 4, 1, 4], ngram_value=3) [(1, 4, 9), (4, 9, 4), (9, 4, 1), (4, 1, 4)] """ return set(zip(*[input_list[i:] for i in range(ngram_value)])) def add_ngram(sequences, token_indice, ngram_range=2): """ Augment the input list of list (sequences) by appending n-grams values. Example: adding bi-gram >>> sequences = [[1, 3, 4, 5], [1, 3, 7, 9, 2]] >>> token_indice = {(1, 3): 1337, (9, 2): 42, (4, 5): 2017} >>> add_ngram(sequences, token_indice, ngram_range=2) [[1, 3, 4, 5, 1337, 2017], [1, 3, 7, 9, 2, 1337, 42]] Example: adding tri-gram >>> sequences = [[1, 3, 4, 5], [1, 3, 7, 9, 2]] >>> token_indice = {(1, 3): 1337, (9, 2): 42, (4, 5): 2017, (7, 9, 2): 2018} >>> add_ngram(sequences, token_indice, ngram_range=3) [[1, 3, 4, 5, 1337, 2017], [1, 3, 7, 9, 2, 1337, 42, 2018]] """ new_sequences = [] for input_list in sequences: new_list = input_list[:] for ngram_value in range(2, ngram_range + 1): for i in range(len(new_list) - ngram_value + 1): ngram = tuple(new_list[i:i + ngram_value]) if ngram in token_indice: new_list.append(token_indice[ngram]) new_sequences.append(new_list) return new_sequences # Set parameters: # ngram_range = 2 will add bi-grams features ngram_range = 1 max_features = 20000 maxlen = 400 batch_size = 32 embedding_dims = 50 epochs = 5 print('Loading data...') (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Average train sequence length: {}'.format( np.mean(list(map(len, x_train)), dtype=int))) print('Average test sequence length: {}'.format( np.mean(list(map(len, x_test)), dtype=int))) if ngram_range > 1: print('Adding {}-gram features'.format(ngram_range)) # Create set of unique n-gram from the training set. ngram_set = set() for input_list in x_train: for i in range(2, ngram_range + 1): set_of_ngram = create_ngram_set(input_list, ngram_value=i) ngram_set.update(set_of_ngram) # Dictionary mapping n-gram token to a unique integer. # Integer values are greater than max_features in order # to avoid collision with existing features. start_index = max_features + 1 token_indice = {v: k + start_index for k, v in enumerate(ngram_set)} indice_token = {token_indice[k]: k for k in token_indice} # max_features is the highest integer that could be found in the dataset. max_features = np.max(list(indice_token.keys())) + 1 # Augmenting x_train and x_test with n-grams features x_train = add_ngram(x_train, token_indice, ngram_range) x_test = add_ngram(x_test, token_indice, ngram_range) print('Average train sequence length: {}'.format( np.mean(list(map(len, x_train)), dtype=int))) print('Average test sequence length: {}'.format( np.mean(list(map(len, x_test)), dtype=int))) print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) print('Build model...') model = Sequential() # we start off with an efficient embedding layer which maps # our vocab indices into embedding_dims dimensions model.add(Embedding(max_features, embedding_dims, input_length=maxlen)) # we add a GlobalAveragePooling1D, which will average the embeddings # of all words in the document model.add(GlobalAveragePooling1D()) # We project onto a single unit output layer, and squash it with a sigmoid: model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test)) Keras-2.2.4/examples/imdb_cnn.py0000644000000000116100000000411013202372135016254 0ustar rooteng00000000000000'''This example demonstrates the use of Convolution1D for text classification. Gets to 0.89 test accuracy after 2 epochs. 90s/epoch on Intel i5 2.4Ghz CPU. 10s/epoch on Tesla K40 GPU. ''' from __future__ import print_function from keras.preprocessing import sequence from keras.models import Sequential from keras.layers import Dense, Dropout, Activation from keras.layers import Embedding from keras.layers import Conv1D, GlobalMaxPooling1D from keras.datasets import imdb # set parameters: max_features = 5000 maxlen = 400 batch_size = 32 embedding_dims = 50 filters = 250 kernel_size = 3 hidden_dims = 250 epochs = 2 print('Loading data...') (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) print('Build model...') model = Sequential() # we start off with an efficient embedding layer which maps # our vocab indices into embedding_dims dimensions model.add(Embedding(max_features, embedding_dims, input_length=maxlen)) model.add(Dropout(0.2)) # we add a Convolution1D, which will learn filters # word group filters of size filter_length: model.add(Conv1D(filters, kernel_size, padding='valid', activation='relu', strides=1)) # we use max pooling: model.add(GlobalMaxPooling1D()) # We add a vanilla hidden layer: model.add(Dense(hidden_dims)) model.add(Dropout(0.2)) model.add(Activation('relu')) # We project onto a single unit output layer, and squash it with a sigmoid: model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test)) Keras-2.2.4/examples/imdb_lstm.py0000644000000000116100000000352613342055016016470 0ustar rooteng00000000000000'''Trains an LSTM model on the IMDB sentiment classification task. The dataset is actually too small for LSTM to be of any advantage compared to simpler, much faster methods such as TF-IDF + LogReg. # Notes - RNNs are tricky. Choice of batch size is important, choice of loss and optimizer is critical, etc. Some configurations won't converge. - LSTM loss decrease patterns during training can be quite different from what you see with CNNs/MLPs/etc. ''' from __future__ import print_function from keras.preprocessing import sequence from keras.models import Sequential from keras.layers import Dense, Embedding from keras.layers import LSTM from keras.datasets import imdb max_features = 20000 # cut texts after this number of words (among top max_features most common words) maxlen = 80 batch_size = 32 print('Loading data...') (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) print('Build model...') model = Sequential() model.add(Embedding(max_features, 128)) model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(1, activation='sigmoid')) # try using different optimizers and different optimizer configs model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print('Train...') model.fit(x_train, y_train, batch_size=batch_size, epochs=15, validation_data=(x_test, y_test)) score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) print('Test score:', score) print('Test accuracy:', acc) Keras-2.2.4/examples/imdb_bidirectional_lstm.py0000644000000000116100000000266113202372135021356 0ustar rooteng00000000000000'''Trains a Bidirectional LSTM on the IMDB sentiment classification task. Output after 4 epochs on CPU: ~0.8146 Time per epoch on CPU (Core i7): ~150s. ''' from __future__ import print_function import numpy as np from keras.preprocessing import sequence from keras.models import Sequential from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional from keras.datasets import imdb max_features = 20000 # cut texts after this number of words # (among top max_features most common words) maxlen = 100 batch_size = 32 print('Loading data...') (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) y_train = np.array(y_train) y_test = np.array(y_test) model = Sequential() model.add(Embedding(max_features, 128, input_length=maxlen)) model.add(Bidirectional(LSTM(64))) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) # try using different optimizers and different optimizer configs model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) print('Train...') model.fit(x_train, y_train, batch_size=batch_size, epochs=4, validation_data=[x_test, y_test]) Keras-2.2.4/examples/cifar10_cnn_capsule.py0000644000000000116100000002076313342055016020317 0ustar rooteng00000000000000"""Train a simple CNN-Capsule Network on the CIFAR10 small images dataset. Without Data Augmentation: It gets to 75% validation accuracy in 10 epochs, and 79% after 15 epochs, and overfitting after 20 epochs With Data Augmentation: It gets to 75% validation accuracy in 10 epochs, and 79% after 15 epochs, and 83% after 30 epochs. In my test, highest validation accuracy is 83.79% after 50 epochs. This is a fast Implement, just 20s/epoch with a gtx 1070 gpu. """ from __future__ import print_function from keras import backend as K from keras.layers import Layer from keras import activations from keras import utils from keras.datasets import cifar10 from keras.models import Model from keras.layers import * from keras.preprocessing.image import ImageDataGenerator # the squashing function. # we use 0.5 in stead of 1 in hinton's paper. # if 1, the norm of vector will be zoomed out. # if 0.5, the norm will be zoomed in while original norm is less than 0.5 # and be zoomed out while original norm is greater than 0.5. def squash(x, axis=-1): s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon() scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm) return scale * x # define our own softmax function instead of K.softmax # because K.softmax can not specify axis. def softmax(x, axis=-1): ex = K.exp(x - K.max(x, axis=axis, keepdims=True)) return ex / K.sum(ex, axis=axis, keepdims=True) # define the margin loss like hinge loss def margin_loss(y_true, y_pred): lamb, margin = 0.5, 0.1 return K.sum(y_true * K.square(K.relu(1 - margin - y_pred)) + lamb * ( 1 - y_true) * K.square(K.relu(y_pred - margin)), axis=-1) class Capsule(Layer): """A Capsule Implement with Pure Keras There are two vesions of Capsule. One is like dense layer (for the fixed-shape input), and the other is like timedistributed dense (for various length input). The input shape of Capsule must be (batch_size, input_num_capsule, input_dim_capsule ) and the output shape is (batch_size, num_capsule, dim_capsule ) Capsule Implement is from https://github.com/bojone/Capsule/ Capsule Paper: https://arxiv.org/abs/1710.09829 """ def __init__(self, num_capsule, dim_capsule, routings=3, share_weights=True, activation='squash', **kwargs): super(Capsule, self).__init__(**kwargs) self.num_capsule = num_capsule self.dim_capsule = dim_capsule self.routings = routings self.share_weights = share_weights if activation == 'squash': self.activation = squash else: self.activation = activations.get(activation) def build(self, input_shape): input_dim_capsule = input_shape[-1] if self.share_weights: self.kernel = self.add_weight( name='capsule_kernel', shape=(1, input_dim_capsule, self.num_capsule * self.dim_capsule), initializer='glorot_uniform', trainable=True) else: input_num_capsule = input_shape[-2] self.kernel = self.add_weight( name='capsule_kernel', shape=(input_num_capsule, input_dim_capsule, self.num_capsule * self.dim_capsule), initializer='glorot_uniform', trainable=True) def call(self, inputs): """Following the routing algorithm from Hinton's paper, but replace b = b + with b = . This change can improve the feature representation of Capsule. However, you can replace b = K.batch_dot(outputs, hat_inputs, [2, 3]) with b += K.batch_dot(outputs, hat_inputs, [2, 3]) to realize a standard routing. """ if self.share_weights: hat_inputs = K.conv1d(inputs, self.kernel) else: hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1]) batch_size = K.shape(inputs)[0] input_num_capsule = K.shape(inputs)[1] hat_inputs = K.reshape(hat_inputs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3)) b = K.zeros_like(hat_inputs[:, :, :, 0]) for i in range(self.routings): c = softmax(b, 1) if K.backend() == 'theano': o = K.sum(o, axis=1) o = self.activation(K.batch_dot(c, hat_inputs, [2, 2])) if i < self.routings - 1: b = K.batch_dot(o, hat_inputs, [2, 3]) if K.backend() == 'theano': o = K.sum(o, axis=1) return o def compute_output_shape(self, input_shape): return (None, self.num_capsule, self.dim_capsule) batch_size = 128 num_classes = 10 epochs = 100 (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 y_train = utils.to_categorical(y_train, num_classes) y_test = utils.to_categorical(y_test, num_classes) # A common Conv2D model input_image = Input(shape=(None, None, 3)) x = Conv2D(64, (3, 3), activation='relu')(input_image) x = Conv2D(64, (3, 3), activation='relu')(x) x = AveragePooling2D((2, 2))(x) x = Conv2D(128, (3, 3), activation='relu')(x) x = Conv2D(128, (3, 3), activation='relu')(x) """now we reshape it as (batch_size, input_num_capsule, input_dim_capsule) then connect a Capsule layer. the output of final model is the lengths of 10 Capsule, whose dim=16. the length of Capsule is the proba, so the problem becomes a 10 two-classification problem. """ x = Reshape((-1, 128))(x) capsule = Capsule(10, 16, 3, True)(x) output = Lambda(lambda z: K.sqrt(K.sum(K.square(z), 2)))(capsule) model = Model(inputs=input_image, outputs=output) # we use a margin loss model.compile(loss=margin_loss, optimizer='adam', metrics=['accuracy']) model.summary() # we can compare the performance with or without data augmentation data_augmentation = True if not data_augmentation: print('Not using data augmentation.') model.fit( x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True) else: print('Using real-time data augmentation.') # This will do preprocessing and realtime data augmentation: datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by dataset std samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening zca_epsilon=1e-06, # epsilon for ZCA whitening rotation_range=0, # randomly rotate images in 0 to 180 degrees width_shift_range=0.1, # randomly shift images horizontally height_shift_range=0.1, # randomly shift images vertically shear_range=0., # set range for random shear zoom_range=0., # set range for random zoom channel_shift_range=0., # set range for random channel shifts # set mode for filling points outside the input boundaries fill_mode='nearest', cval=0., # value used for fill_mode = "constant" horizontal_flip=True, # randomly flip images vertical_flip=False, # randomly flip images # set rescaling factor (applied before any other transformation) rescale=None, # set function that will be applied on each input preprocessing_function=None, # image data format, either "channels_first" or "channels_last" data_format=None, # fraction of images reserved for validation (strictly between 0 and 1) validation_split=0.0) # Compute quantities required for feature-wise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(x_train) # Fit the model on the batches generated by datagen.flow(). model.fit_generator( datagen.flow(x_train, y_train, batch_size=batch_size), epochs=epochs, validation_data=(x_test, y_test), workers=4) Keras-2.2.4/examples/mnist_sklearn_wrapper.py0000644000000000116100000000716213354530144021132 0ustar rooteng00000000000000'''Example of how to use sklearn wrapper Builds simple CNN models on MNIST and uses sklearn's GridSearchCV to find best model ''' from __future__ import print_function import keras from keras.datasets import mnist from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten from keras.layers import Conv2D, MaxPooling2D from keras.wrappers.scikit_learn import KerasClassifier from keras import backend as K from sklearn.model_selection import GridSearchCV num_classes = 10 # input image dimensions img_rows, img_cols = 28, 28 # load training data and do basic data normalization (x_train, y_train), (x_test, y_test) = mnist.load_data() if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) def make_model(dense_layer_sizes, filters, kernel_size, pool_size): '''Creates model comprised of 2 convolutional layers followed by dense layers dense_layer_sizes: List of layer sizes. This list has one number for each layer filters: Number of convolutional filters in each convolutional layer kernel_size: Convolutional kernel size pool_size: Size of pooling area for max pooling ''' model = Sequential() model.add(Conv2D(filters, kernel_size, padding='valid', input_shape=input_shape)) model.add(Activation('relu')) model.add(Conv2D(filters, kernel_size)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=pool_size)) model.add(Dropout(0.25)) model.add(Flatten()) for layer_size in dense_layer_sizes: model.add(Dense(layer_size)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']) return model dense_size_candidates = [[32], [64], [32, 32], [64, 64]] my_classifier = KerasClassifier(make_model, batch_size=32) validator = GridSearchCV(my_classifier, param_grid={'dense_layer_sizes': dense_size_candidates, # epochs is avail for tuning even when not # an argument to model building function 'epochs': [3, 6], 'filters': [8], 'kernel_size': [3], 'pool_size': [2]}, scoring='neg_log_loss', n_jobs=1) validator.fit(x_train, y_train) print('The parameters of the best model are: ') print(validator.best_params_) # validator.best_estimator_ returns sklearn-wrapped version of best model. # validator.best_estimator_.model returns the (unwrapped) keras model best_model = validator.best_estimator_.model metric_names = best_model.metrics_names metric_values = best_model.evaluate(x_test, y_test) for metric, value in zip(metric_names, metric_values): print(metric, ': ', value) Keras-2.2.4/examples/mnist_hierarchical_rnn.py0000644000000000116100000000636013342055016021222 0ustar rooteng00000000000000"""Example of using Hierarchical RNN (HRNN) to classify MNIST digits. HRNNs can learn across multiple levels of temporal hierarchy over a complex sequence. Usually, the first recurrent layer of an HRNN encodes a sentence (e.g. of word vectors) into a sentence vector. The second recurrent layer then encodes a sequence of such vectors (encoded by the first layer) into a document vector. This document vector is considered to preserve both the word-level and sentence-level structure of the context. # References - [A Hierarchical Neural Autoencoder for Paragraphs and Documents] (https://arxiv.org/abs/1506.01057) Encodes paragraphs and documents with HRNN. Results have shown that HRNN outperforms standard RNNs and may play some role in more sophisticated generation tasks like summarization or question answering. - [Hierarchical recurrent neural network for skeleton based action recognition] (http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7298714) Achieved state-of-the-art results on skeleton based action recognition with 3 levels of bidirectional HRNN combined with fully connected layers. In the below MNIST example the first LSTM layer first encodes every column of pixels of shape (28, 1) to a column vector of shape (128,). The second LSTM layer encodes then these 28 column vectors of shape (28, 128) to a image vector representing the whole image. A final Dense layer is added for prediction. After 5 epochs: train acc: 0.9858, val acc: 0.9864 """ from __future__ import print_function import keras from keras.datasets import mnist from keras.models import Model from keras.layers import Input, Dense, TimeDistributed from keras.layers import LSTM # Training parameters. batch_size = 32 num_classes = 10 epochs = 5 # Embedding dimensions. row_hidden = 128 col_hidden = 128 # The data, split between train and test sets. (x_train, y_train), (x_test, y_test) = mnist.load_data() # Reshapes data to 4D for Hierarchical RNN. x_train = x_train.reshape(x_train.shape[0], 28, 28, 1) x_test = x_test.reshape(x_test.shape[0], 28, 28, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # Converts class vectors to binary class matrices. y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) row, col, pixel = x_train.shape[1:] # 4D input. x = Input(shape=(row, col, pixel)) # Encodes a row of pixels using TimeDistributed Wrapper. encoded_rows = TimeDistributed(LSTM(row_hidden))(x) # Encodes columns of encoded rows. encoded_columns = LSTM(col_hidden)(encoded_rows) # Final predictions and model. prediction = Dense(num_classes, activation='softmax')(encoded_columns) model = Model(x, prediction) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) # Training. model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) # Evaluation. scores = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', scores[0]) print('Test accuracy:', scores[1]) Keras-2.2.4/examples/neural_doodle.py0000644000000000116100000003341113342055016017326 0ustar rooteng00000000000000'''Neural doodle with Keras # Script Usage ## Arguments ``` --nlabels: # of regions (colors) in mask images --style-image: image to learn style from --style-mask: semantic labels for style image --target-mask: semantic labels for target image (your doodle) --content-image: optional image to learn content from --target-image-prefix: path prefix for generated target images ``` ## Example 1: doodle using a style image, style mask and target mask. ``` python neural_doodle.py --nlabels 4 --style-image Monet/style.png \ --style-mask Monet/style_mask.png --target-mask Monet/target_mask.png \ --target-image-prefix generated/monet ``` ## Example 2: doodle using a style image, style mask, target mask and an optional content image. ``` python neural_doodle.py --nlabels 4 --style-image Renoir/style.png \ --style-mask Renoir/style_mask.png --target-mask Renoir/target_mask.png \ --content-image Renoir/creek.jpg \ --target-image-prefix generated/renoir ``` # References - [Dmitry Ulyanov's blog on fast-neural-doodle] (http://dmitryulyanov.github.io/feed-forward-neural-doodle/) - [Torch code for fast-neural-doodle] (https://github.com/DmitryUlyanov/fast-neural-doodle) - [Torch code for online-neural-doodle] (https://github.com/DmitryUlyanov/online-neural-doodle) - [Paper Texture Networks: Feed-forward Synthesis of Textures and Stylized Images] (http://arxiv.org/abs/1603.03417) - [Discussion on parameter tuning] (https://github.com/keras-team/keras/issues/3705) # Resources Example images can be downloaded from https://github.com/DmitryUlyanov/fast-neural-doodle/tree/master/data ''' from __future__ import print_function import time import argparse import numpy as np from scipy.optimize import fmin_l_bfgs_b from keras import backend as K from keras.layers import Input, AveragePooling2D from keras.models import Model from keras.preprocessing.image import load_img, save_img, img_to_array from keras.applications import vgg19 # Command line arguments parser = argparse.ArgumentParser(description='Keras neural doodle example') parser.add_argument('--nlabels', type=int, help='number of semantic labels' ' (regions in differnet colors)' ' in style_mask/target_mask') parser.add_argument('--style-image', type=str, help='path to image to learn style from') parser.add_argument('--style-mask', type=str, help='path to semantic mask of style image') parser.add_argument('--target-mask', type=str, help='path to semantic mask of target image') parser.add_argument('--content-image', type=str, default=None, help='path to optional content image') parser.add_argument('--target-image-prefix', type=str, help='path prefix for generated results') args = parser.parse_args() style_img_path = args.style_image style_mask_path = args.style_mask target_mask_path = args.target_mask content_img_path = args.content_image target_img_prefix = args.target_image_prefix use_content_img = content_img_path is not None num_labels = args.nlabels num_colors = 3 # RGB # determine image sizes based on target_mask ref_img = img_to_array(load_img(target_mask_path)) img_nrows, img_ncols = ref_img.shape[:2] total_variation_weight = 50. style_weight = 1. content_weight = 0.1 if use_content_img else 0 content_feature_layers = ['block5_conv2'] # To get better generation qualities, use more conv layers for style features style_feature_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1'] # helper functions for reading/processing images def preprocess_image(image_path): img = load_img(image_path, target_size=(img_nrows, img_ncols)) img = img_to_array(img) img = np.expand_dims(img, axis=0) img = vgg19.preprocess_input(img) return img def deprocess_image(x): if K.image_data_format() == 'channels_first': x = x.reshape((3, img_nrows, img_ncols)) x = x.transpose((1, 2, 0)) else: x = x.reshape((img_nrows, img_ncols, 3)) # Remove zero-center by mean pixel x[:, :, 0] += 103.939 x[:, :, 1] += 116.779 x[:, :, 2] += 123.68 # 'BGR'->'RGB' x = x[:, :, ::-1] x = np.clip(x, 0, 255).astype('uint8') return x def kmeans(xs, k): assert xs.ndim == 2 try: from sklearn.cluster import k_means _, labels, _ = k_means(xs.astype('float64'), k) except ImportError: from scipy.cluster.vq import kmeans2 _, labels = kmeans2(xs, k, missing='raise') return labels def load_mask_labels(): '''Load both target and style masks. A mask image (nr x nc) with m labels/colors will be loaded as a 4D boolean tensor: (1, m, nr, nc) for 'channels_first' or (1, nr, nc, m) for 'channels_last' ''' target_mask_img = load_img(target_mask_path, target_size=(img_nrows, img_ncols)) target_mask_img = img_to_array(target_mask_img) style_mask_img = load_img(style_mask_path, target_size=(img_nrows, img_ncols)) style_mask_img = img_to_array(style_mask_img) if K.image_data_format() == 'channels_first': mask_vecs = np.vstack([style_mask_img.reshape((3, -1)).T, target_mask_img.reshape((3, -1)).T]) else: mask_vecs = np.vstack([style_mask_img.reshape((-1, 3)), target_mask_img.reshape((-1, 3))]) labels = kmeans(mask_vecs, num_labels) style_mask_label = labels[:img_nrows * img_ncols].reshape((img_nrows, img_ncols)) target_mask_label = labels[img_nrows * img_ncols:].reshape((img_nrows, img_ncols)) stack_axis = 0 if K.image_data_format() == 'channels_first' else -1 style_mask = np.stack([style_mask_label == r for r in range(num_labels)], axis=stack_axis) target_mask = np.stack([target_mask_label == r for r in range(num_labels)], axis=stack_axis) return (np.expand_dims(style_mask, axis=0), np.expand_dims(target_mask, axis=0)) # Create tensor variables for images if K.image_data_format() == 'channels_first': shape = (1, num_colors, img_nrows, img_ncols) else: shape = (1, img_nrows, img_ncols, num_colors) style_image = K.variable(preprocess_image(style_img_path)) target_image = K.placeholder(shape=shape) if use_content_img: content_image = K.variable(preprocess_image(content_img_path)) else: content_image = K.zeros(shape=shape) images = K.concatenate([style_image, target_image, content_image], axis=0) # Create tensor variables for masks raw_style_mask, raw_target_mask = load_mask_labels() style_mask = K.variable(raw_style_mask.astype('float32')) target_mask = K.variable(raw_target_mask.astype('float32')) masks = K.concatenate([style_mask, target_mask], axis=0) # index constants for images and tasks variables STYLE, TARGET, CONTENT = 0, 1, 2 # Build image model, mask model and use layer outputs as features # image model as VGG19 image_model = vgg19.VGG19(include_top=False, input_tensor=images) # mask model as a series of pooling mask_input = Input(tensor=masks, shape=(None, None, None), name='mask_input') x = mask_input for layer in image_model.layers[1:]: name = 'mask_%s' % layer.name if 'conv' in layer.name: x = AveragePooling2D((3, 3), padding='same', strides=( 1, 1), name=name)(x) elif 'pool' in layer.name: x = AveragePooling2D((2, 2), name=name)(x) mask_model = Model(mask_input, x) # Collect features from image_model and task_model image_features = {} mask_features = {} for img_layer, mask_layer in zip(image_model.layers, mask_model.layers): if 'conv' in img_layer.name: assert 'mask_' + img_layer.name == mask_layer.name layer_name = img_layer.name img_feat, mask_feat = img_layer.output, mask_layer.output image_features[layer_name] = img_feat mask_features[layer_name] = mask_feat # Define loss functions def gram_matrix(x): assert K.ndim(x) == 3 features = K.batch_flatten(x) gram = K.dot(features, K.transpose(features)) return gram def region_style_loss(style_image, target_image, style_mask, target_mask): '''Calculate style loss between style_image and target_image, for one common region specified by their (boolean) masks ''' assert 3 == K.ndim(style_image) == K.ndim(target_image) assert 2 == K.ndim(style_mask) == K.ndim(target_mask) if K.image_data_format() == 'channels_first': masked_style = style_image * style_mask masked_target = target_image * target_mask num_channels = K.shape(style_image)[0] else: masked_style = K.permute_dimensions( style_image, (2, 0, 1)) * style_mask masked_target = K.permute_dimensions( target_image, (2, 0, 1)) * target_mask num_channels = K.shape(style_image)[-1] num_channels = K.cast(num_channels, dtype='float32') s = gram_matrix(masked_style) / K.mean(style_mask) / num_channels c = gram_matrix(masked_target) / K.mean(target_mask) / num_channels return K.mean(K.square(s - c)) def style_loss(style_image, target_image, style_masks, target_masks): '''Calculate style loss between style_image and target_image, in all regions. ''' assert 3 == K.ndim(style_image) == K.ndim(target_image) assert 3 == K.ndim(style_masks) == K.ndim(target_masks) loss = K.variable(0) for i in range(num_labels): if K.image_data_format() == 'channels_first': style_mask = style_masks[i, :, :] target_mask = target_masks[i, :, :] else: style_mask = style_masks[:, :, i] target_mask = target_masks[:, :, i] loss += region_style_loss(style_image, target_image, style_mask, target_mask) return loss def content_loss(content_image, target_image): return K.sum(K.square(target_image - content_image)) def total_variation_loss(x): assert 4 == K.ndim(x) if K.image_data_format() == 'channels_first': a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1]) b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:]) else: a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :]) b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :]) return K.sum(K.pow(a + b, 1.25)) # Overall loss is the weighted sum of content_loss, style_loss and tv_loss # Each individual loss uses features from image/mask models. loss = K.variable(0) for layer in content_feature_layers: content_feat = image_features[layer][CONTENT, :, :, :] target_feat = image_features[layer][TARGET, :, :, :] loss += content_weight * content_loss(content_feat, target_feat) for layer in style_feature_layers: style_feat = image_features[layer][STYLE, :, :, :] target_feat = image_features[layer][TARGET, :, :, :] style_masks = mask_features[layer][STYLE, :, :, :] target_masks = mask_features[layer][TARGET, :, :, :] sl = style_loss(style_feat, target_feat, style_masks, target_masks) loss += (style_weight / len(style_feature_layers)) * sl loss += total_variation_weight * total_variation_loss(target_image) loss_grads = K.gradients(loss, target_image) # Evaluator class for computing efficiency outputs = [loss] if isinstance(loss_grads, (list, tuple)): outputs += loss_grads else: outputs.append(loss_grads) f_outputs = K.function([target_image], outputs) def eval_loss_and_grads(x): if K.image_data_format() == 'channels_first': x = x.reshape((1, 3, img_nrows, img_ncols)) else: x = x.reshape((1, img_nrows, img_ncols, 3)) outs = f_outputs([x]) loss_value = outs[0] if len(outs[1:]) == 1: grad_values = outs[1].flatten().astype('float64') else: grad_values = np.array(outs[1:]).flatten().astype('float64') return loss_value, grad_values class Evaluator(object): def __init__(self): self.loss_value = None self.grads_values = None def loss(self, x): assert self.loss_value is None loss_value, grad_values = eval_loss_and_grads(x) self.loss_value = loss_value self.grad_values = grad_values return self.loss_value def grads(self, x): assert self.loss_value is not None grad_values = np.copy(self.grad_values) self.loss_value = None self.grad_values = None return grad_values evaluator = Evaluator() # Generate images by iterative optimization if K.image_data_format() == 'channels_first': x = np.random.uniform(0, 255, (1, 3, img_nrows, img_ncols)) - 128. else: x = np.random.uniform(0, 255, (1, img_nrows, img_ncols, 3)) - 128. for i in range(50): print('Start of iteration', i) start_time = time.time() x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), fprime=evaluator.grads, maxfun=20) print('Current loss value:', min_val) # save current generated image img = deprocess_image(x.copy()) fname = target_img_prefix + '_at_iteration_%d.png' % i save_img(fname, img) end_time = time.time() print('Image saved as', fname) print('Iteration %d completed in %ds' % (i, end_time - start_time)) Keras-2.2.4/examples/lstm_stateful.py0000644000000000116100000001731413227311000017372 0ustar rooteng00000000000000'''Example script showing how to use a stateful LSTM model and how its stateless counterpart performs. More documentation about the Keras LSTM model can be found at https://keras.io/layers/recurrent/#lstm The models are trained on an input/output pair, where the input is a generated uniformly distributed random sequence of length = "input_len", and the output is a moving average of the input with window length = "tsteps". Both "input_len" and "tsteps" are defined in the "editable parameters" section. A larger "tsteps" value means that the LSTM will need more memory to figure out the input-output relationship. This memory length is controlled by the "lahead" variable (more details below). The rest of the parameters are: - input_len: the length of the generated input sequence - lahead: the input sequence length that the LSTM is trained on for each output point - batch_size, epochs: same parameters as in the model.fit(...) function When lahead > 1, the model input is preprocessed to a "rolling window view" of the data, with the window length = "lahead". This is similar to sklearn's "view_as_windows" with "window_shape" being a single number Ref: http://scikit-image.org/docs/0.10.x/api/skimage.util.html#view-as-windows When lahead < tsteps, only the stateful LSTM converges because its statefulness allows it to see beyond the capability that lahead gave it to fit the n-point average. The stateless LSTM does not have this capability, and hence is limited by its "lahead" parameter, which is not sufficient to see the n-point average. When lahead >= tsteps, both the stateful and stateless LSTM converge. ''' from __future__ import print_function import numpy as np import matplotlib.pyplot as plt import pandas as pd from keras.models import Sequential from keras.layers import Dense, LSTM # ---------------------------------------------------------- # EDITABLE PARAMETERS # Read the documentation in the script head for more details # ---------------------------------------------------------- # length of input input_len = 1000 # The window length of the moving average used to generate # the output from the input in the input/output pair used # to train the LSTM # e.g. if tsteps=2 and input=[1, 2, 3, 4, 5], # then output=[1.5, 2.5, 3.5, 4.5] tsteps = 2 # The input sequence length that the LSTM is trained on for each output point lahead = 1 # training parameters passed to "model.fit(...)" batch_size = 1 epochs = 10 # ------------ # MAIN PROGRAM # ------------ print("*" * 33) if lahead >= tsteps: print("STATELESS LSTM WILL ALSO CONVERGE") else: print("STATELESS LSTM WILL NOT CONVERGE") print("*" * 33) np.random.seed(1986) print('Generating Data...') def gen_uniform_amp(amp=1, xn=10000): """Generates uniform random data between -amp and +amp and of length xn Arguments: amp: maximum/minimum range of uniform data xn: length of series """ data_input = np.random.uniform(-1 * amp, +1 * amp, xn) data_input = pd.DataFrame(data_input) return data_input # Since the output is a moving average of the input, # the first few points of output will be NaN # and will be dropped from the generated data # before training the LSTM. # Also, when lahead > 1, # the preprocessing step later of "rolling window view" # will also cause some points to be lost. # For aesthetic reasons, # in order to maintain generated data length = input_len after pre-processing, # add a few points to account for the values that will be lost. to_drop = max(tsteps - 1, lahead - 1) data_input = gen_uniform_amp(amp=0.1, xn=input_len + to_drop) # set the target to be a N-point average of the input expected_output = data_input.rolling(window=tsteps, center=False).mean() # when lahead > 1, need to convert the input to "rolling window view" # https://docs.scipy.org/doc/numpy/reference/generated/numpy.repeat.html if lahead > 1: data_input = np.repeat(data_input.values, repeats=lahead, axis=1) data_input = pd.DataFrame(data_input) for i, c in enumerate(data_input.columns): data_input[c] = data_input[c].shift(i) # drop the nan expected_output = expected_output[to_drop:] data_input = data_input[to_drop:] print('Input shape:', data_input.shape) print('Output shape:', expected_output.shape) print('Input head: ') print(data_input.head()) print('Output head: ') print(expected_output.head()) print('Input tail: ') print(data_input.tail()) print('Output tail: ') print(expected_output.tail()) print('Plotting input and expected output') plt.plot(data_input[0][:10], '.') plt.plot(expected_output[0][:10], '-') plt.legend(['Input', 'Expected output']) plt.title('Input') plt.show() def create_model(stateful): model = Sequential() model.add(LSTM(20, input_shape=(lahead, 1), batch_size=batch_size, stateful=stateful)) model.add(Dense(1)) model.compile(loss='mse', optimizer='adam') return model print('Creating Stateful Model...') model_stateful = create_model(stateful=True) # split train/test data def split_data(x, y, ratio=0.8): to_train = int(input_len * ratio) # tweak to match with batch_size to_train -= to_train % batch_size x_train = x[:to_train] y_train = y[:to_train] x_test = x[to_train:] y_test = y[to_train:] # tweak to match with batch_size to_drop = x.shape[0] % batch_size if to_drop > 0: x_test = x_test[:-1 * to_drop] y_test = y_test[:-1 * to_drop] # some reshaping reshape_3 = lambda x: x.values.reshape((x.shape[0], x.shape[1], 1)) x_train = reshape_3(x_train) x_test = reshape_3(x_test) reshape_2 = lambda x: x.values.reshape((x.shape[0], 1)) y_train = reshape_2(y_train) y_test = reshape_2(y_test) return (x_train, y_train), (x_test, y_test) (x_train, y_train), (x_test, y_test) = split_data(data_input, expected_output) print('x_train.shape: ', x_train.shape) print('y_train.shape: ', y_train.shape) print('x_test.shape: ', x_test.shape) print('y_test.shape: ', y_test.shape) print('Training') for i in range(epochs): print('Epoch', i + 1, '/', epochs) # Note that the last state for sample i in a batch will # be used as initial state for sample i in the next batch. # Thus we are simultaneously training on batch_size series with # lower resolution than the original series contained in data_input. # Each of these series are offset by one step and can be # extracted with data_input[i::batch_size]. model_stateful.fit(x_train, y_train, batch_size=batch_size, epochs=1, verbose=1, validation_data=(x_test, y_test), shuffle=False) model_stateful.reset_states() print('Predicting') predicted_stateful = model_stateful.predict(x_test, batch_size=batch_size) print('Creating Stateless Model...') model_stateless = create_model(stateful=False) print('Training') model_stateless.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test), shuffle=False) print('Predicting') predicted_stateless = model_stateless.predict(x_test, batch_size=batch_size) # ---------------------------- print('Plotting Results') plt.subplot(3, 1, 1) plt.plot(y_test) plt.title('Expected') plt.subplot(3, 1, 2) # drop the first "tsteps-1" because it is not possible to predict them # since the "previous" timesteps to use do not exist plt.plot((y_test - predicted_stateful).flatten()[tsteps - 1:]) plt.title('Stateful: Expected - Predicted') plt.subplot(3, 1, 3) plt.plot((y_test - predicted_stateless).flatten()) plt.title('Stateless: Expected - Predicted') plt.show() Keras-2.2.4/examples/tensorboard_embeddings_mnist.py0000644000000000116100000000611613305602621022430 0ustar rooteng00000000000000'''Trains a simple convnet on the MNIST dataset and embeds test data. The test data is embedded using the weights of the final dense layer, just before the classification head. This embedding can then be visualized using TensorBoard's Embedding Projector. ''' from __future__ import print_function from os import makedirs from os.path import exists, join import keras from keras.callbacks import TensorBoard from keras.datasets import mnist from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten from keras.layers import Conv2D, MaxPooling2D from keras import backend as K import numpy as np batch_size = 128 num_classes = 10 epochs = 12 log_dir = './logs' if not exists(log_dir): makedirs(log_dir) # input image dimensions img_rows, img_cols = 28, 28 # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # save class labels to disk to color data points in TensorBoard accordingly with open(join(log_dir, 'metadata.tsv'), 'w') as f: np.savetxt(f, y_test) # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) tensorboard = TensorBoard(batch_size=batch_size, embeddings_freq=1, embeddings_layer_names=['features'], embeddings_metadata='metadata.tsv', embeddings_data=x_test) model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu', name='features')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, callbacks=[tensorboard], epochs=epochs, verbose=1, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) # You can now launch tensorboard with `tensorboard --logdir=./logs` on your # command line and then go to http://localhost:6006/#projector to view the # embeddings Keras-2.2.4/examples/cifar10_resnet.py0000644000000000116100000003576513312516314017335 0ustar rooteng00000000000000"""Trains a ResNet on the CIFAR10 dataset. ResNet v1 [a] Deep Residual Learning for Image Recognition https://arxiv.org/pdf/1512.03385.pdf ResNet v2 [b] Identity Mappings in Deep Residual Networks https://arxiv.org/pdf/1603.05027.pdf """ from __future__ import print_function import keras from keras.layers import Dense, Conv2D, BatchNormalization, Activation from keras.layers import AveragePooling2D, Input, Flatten from keras.optimizers import Adam from keras.callbacks import ModelCheckpoint, LearningRateScheduler from keras.callbacks import ReduceLROnPlateau from keras.preprocessing.image import ImageDataGenerator from keras.regularizers import l2 from keras import backend as K from keras.models import Model from keras.datasets import cifar10 import numpy as np import os # Training parameters batch_size = 32 # orig paper trained all networks with batch_size=128 epochs = 200 data_augmentation = True num_classes = 10 # Subtracting pixel mean improves accuracy subtract_pixel_mean = True # Model parameter # ---------------------------------------------------------------------------- # | | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch # Model | n | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti # |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2) # ---------------------------------------------------------------------------- # ResNet20 | 3 (2)| 92.16 | 91.25 | ----- | ----- | 35 (---) # ResNet32 | 5(NA)| 92.46 | 92.49 | NA | NA | 50 ( NA) # ResNet44 | 7(NA)| 92.50 | 92.83 | NA | NA | 70 ( NA) # ResNet56 | 9 (6)| 92.71 | 93.03 | 93.01 | NA | 90 (100) # ResNet110 |18(12)| 92.65 | 93.39+-.16| 93.15 | 93.63 | 165(180) # ResNet164 |27(18)| ----- | 94.07 | ----- | 94.54 | ---(---) # ResNet1001| (111)| ----- | 92.39 | ----- | 95.08+-.14| ---(---) # --------------------------------------------------------------------------- n = 3 # Model version # Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2) version = 1 # Computed depth from supplied model parameter n if version == 1: depth = n * 6 + 2 elif version == 2: depth = n * 9 + 2 # Model name, depth and version model_type = 'ResNet%dv%d' % (depth, version) # Load the CIFAR10 data. (x_train, y_train), (x_test, y_test) = cifar10.load_data() # Input image dimensions. input_shape = x_train.shape[1:] # Normalize data. x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 # If subtract pixel mean is enabled if subtract_pixel_mean: x_train_mean = np.mean(x_train, axis=0) x_train -= x_train_mean x_test -= x_train_mean print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') print('y_train shape:', y_train.shape) # Convert class vectors to binary class matrices. y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) def lr_schedule(epoch): """Learning Rate Schedule Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. Called automatically every epoch as part of callbacks during training. # Arguments epoch (int): The number of epochs # Returns lr (float32): learning rate """ lr = 1e-3 if epoch > 180: lr *= 0.5e-3 elif epoch > 160: lr *= 1e-3 elif epoch > 120: lr *= 1e-2 elif epoch > 80: lr *= 1e-1 print('Learning rate: ', lr) return lr def resnet_layer(inputs, num_filters=16, kernel_size=3, strides=1, activation='relu', batch_normalization=True, conv_first=True): """2D Convolution-Batch Normalization-Activation stack builder # Arguments inputs (tensor): input tensor from input image or previous layer num_filters (int): Conv2D number of filters kernel_size (int): Conv2D square kernel dimensions strides (int): Conv2D square stride dimensions activation (string): activation name batch_normalization (bool): whether to include batch normalization conv_first (bool): conv-bn-activation (True) or bn-activation-conv (False) # Returns x (tensor): tensor as input to the next layer """ conv = Conv2D(num_filters, kernel_size=kernel_size, strides=strides, padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4)) x = inputs if conv_first: x = conv(x) if batch_normalization: x = BatchNormalization()(x) if activation is not None: x = Activation(activation)(x) else: if batch_normalization: x = BatchNormalization()(x) if activation is not None: x = Activation(activation)(x) x = conv(x) return x def resnet_v1(input_shape, depth, num_classes=10): """ResNet Version 1 Model builder [a] Stacks of 2 x (3 x 3) Conv2D-BN-ReLU Last ReLU is after the shortcut connection. At the beginning of each stage, the feature map size is halved (downsampled) by a convolutional layer with strides=2, while the number of filters is doubled. Within each stage, the layers have the same number filters and the same number of filters. Features maps sizes: stage 0: 32x32, 16 stage 1: 16x16, 32 stage 2: 8x8, 64 The Number of parameters is approx the same as Table 6 of [a]: ResNet20 0.27M ResNet32 0.46M ResNet44 0.66M ResNet56 0.85M ResNet110 1.7M # Arguments input_shape (tensor): shape of input image tensor depth (int): number of core convolutional layers num_classes (int): number of classes (CIFAR10 has 10) # Returns model (Model): Keras model instance """ if (depth - 2) % 6 != 0: raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') # Start model definition. num_filters = 16 num_res_blocks = int((depth - 2) / 6) inputs = Input(shape=input_shape) x = resnet_layer(inputs=inputs) # Instantiate the stack of residual units for stack in range(3): for res_block in range(num_res_blocks): strides = 1 if stack > 0 and res_block == 0: # first layer but not first stack strides = 2 # downsample y = resnet_layer(inputs=x, num_filters=num_filters, strides=strides) y = resnet_layer(inputs=y, num_filters=num_filters, activation=None) if stack > 0 and res_block == 0: # first layer but not first stack # linear projection residual shortcut connection to match # changed dims x = resnet_layer(inputs=x, num_filters=num_filters, kernel_size=1, strides=strides, activation=None, batch_normalization=False) x = keras.layers.add([x, y]) x = Activation('relu')(x) num_filters *= 2 # Add classifier on top. # v1 does not use BN after last shortcut connection-ReLU x = AveragePooling2D(pool_size=8)(x) y = Flatten()(x) outputs = Dense(num_classes, activation='softmax', kernel_initializer='he_normal')(y) # Instantiate model. model = Model(inputs=inputs, outputs=outputs) return model def resnet_v2(input_shape, depth, num_classes=10): """ResNet Version 2 Model builder [b] Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as bottleneck layer First shortcut connection per layer is 1 x 1 Conv2D. Second and onwards shortcut connection is identity. At the beginning of each stage, the feature map size is halved (downsampled) by a convolutional layer with strides=2, while the number of filter maps is doubled. Within each stage, the layers have the same number filters and the same filter map sizes. Features maps sizes: conv1 : 32x32, 16 stage 0: 32x32, 64 stage 1: 16x16, 128 stage 2: 8x8, 256 # Arguments input_shape (tensor): shape of input image tensor depth (int): number of core convolutional layers num_classes (int): number of classes (CIFAR10 has 10) # Returns model (Model): Keras model instance """ if (depth - 2) % 9 != 0: raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])') # Start model definition. num_filters_in = 16 num_res_blocks = int((depth - 2) / 9) inputs = Input(shape=input_shape) # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths x = resnet_layer(inputs=inputs, num_filters=num_filters_in, conv_first=True) # Instantiate the stack of residual units for stage in range(3): for res_block in range(num_res_blocks): activation = 'relu' batch_normalization = True strides = 1 if stage == 0: num_filters_out = num_filters_in * 4 if res_block == 0: # first layer and first stage activation = None batch_normalization = False else: num_filters_out = num_filters_in * 2 if res_block == 0: # first layer but not first stage strides = 2 # downsample # bottleneck residual unit y = resnet_layer(inputs=x, num_filters=num_filters_in, kernel_size=1, strides=strides, activation=activation, batch_normalization=batch_normalization, conv_first=False) y = resnet_layer(inputs=y, num_filters=num_filters_in, conv_first=False) y = resnet_layer(inputs=y, num_filters=num_filters_out, kernel_size=1, conv_first=False) if res_block == 0: # linear projection residual shortcut connection to match # changed dims x = resnet_layer(inputs=x, num_filters=num_filters_out, kernel_size=1, strides=strides, activation=None, batch_normalization=False) x = keras.layers.add([x, y]) num_filters_in = num_filters_out # Add classifier on top. # v2 has BN-ReLU before Pooling x = BatchNormalization()(x) x = Activation('relu')(x) x = AveragePooling2D(pool_size=8)(x) y = Flatten()(x) outputs = Dense(num_classes, activation='softmax', kernel_initializer='he_normal')(y) # Instantiate model. model = Model(inputs=inputs, outputs=outputs) return model if version == 2: model = resnet_v2(input_shape=input_shape, depth=depth) else: model = resnet_v1(input_shape=input_shape, depth=depth) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=lr_schedule(0)), metrics=['accuracy']) model.summary() print(model_type) # Prepare model model saving directory. save_dir = os.path.join(os.getcwd(), 'saved_models') model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type if not os.path.isdir(save_dir): os.makedirs(save_dir) filepath = os.path.join(save_dir, model_name) # Prepare callbacks for model saving and for learning rate adjustment. checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=1, save_best_only=True) lr_scheduler = LearningRateScheduler(lr_schedule) lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6) callbacks = [checkpoint, lr_reducer, lr_scheduler] # Run training, with or without data augmentation. if not data_augmentation: print('Not using data augmentation.') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True, callbacks=callbacks) else: print('Using real-time data augmentation.') # This will do preprocessing and realtime data augmentation: datagen = ImageDataGenerator( # set input mean to 0 over the dataset featurewise_center=False, # set each sample mean to 0 samplewise_center=False, # divide inputs by std of dataset featurewise_std_normalization=False, # divide each input by its std samplewise_std_normalization=False, # apply ZCA whitening zca_whitening=False, # epsilon for ZCA whitening zca_epsilon=1e-06, # randomly rotate images in the range (deg 0 to 180) rotation_range=0, # randomly shift images horizontally width_shift_range=0.1, # randomly shift images vertically height_shift_range=0.1, # set range for random shear shear_range=0., # set range for random zoom zoom_range=0., # set range for random channel shifts channel_shift_range=0., # set mode for filling points outside the input boundaries fill_mode='nearest', # value used for fill_mode = "constant" cval=0., # randomly flip images horizontal_flip=True, # randomly flip images vertical_flip=False, # set rescaling factor (applied before any other transformation) rescale=None, # set function that will be applied on each input preprocessing_function=None, # image data format, either "channels_first" or "channels_last" data_format=None, # fraction of images reserved for validation (strictly between 0 and 1) validation_split=0.0) # Compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(x_train) # Fit the model on the batches generated by datagen.flow(). model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), validation_data=(x_test, y_test), epochs=epochs, verbose=1, workers=4, callbacks=callbacks) # Score trained model. scores = model.evaluate(x_test, y_test, verbose=1) print('Test loss:', scores[0]) print('Test accuracy:', scores[1]) Keras-2.2.4/examples/variational_autoencoder.py0000644000000000116100000001512313342055016021413 0ustar rooteng00000000000000'''Example of VAE on MNIST dataset using MLP The VAE has a modular design. The encoder, decoder and VAE are 3 models that share weights. After training the VAE model, the encoder can be used to generate latent vectors. The decoder can be used to generate MNIST digits by sampling the latent vector from a Gaussian distribution with mean=0 and std=1. # Reference [1] Kingma, Diederik P., and Max Welling. "Auto-encoding variational bayes." https://arxiv.org/abs/1312.6114 ''' from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras.layers import Lambda, Input, Dense from keras.models import Model from keras.datasets import mnist from keras.losses import mse, binary_crossentropy from keras.utils import plot_model from keras import backend as K import numpy as np import matplotlib.pyplot as plt import argparse import os # reparameterization trick # instead of sampling from Q(z|X), sample eps = N(0,I) # z = z_mean + sqrt(var)*eps def sampling(args): """Reparameterization trick by sampling fr an isotropic unit Gaussian. # Arguments: args (tensor): mean and log of variance of Q(z|X) # Returns: z (tensor): sampled latent vector """ z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] # by default, random_normal has mean=0 and std=1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon def plot_results(models, data, batch_size=128, model_name="vae_mnist"): """Plots labels and MNIST digits as function of 2-dim latent vector # Arguments: models (tuple): encoder and decoder models data (tuple): test data and label batch_size (int): prediction batch size model_name (string): which model is using this function """ encoder, decoder = models x_test, y_test = data os.makedirs(model_name, exist_ok=True) filename = os.path.join(model_name, "vae_mean.png") # display a 2D plot of the digit classes in the latent space z_mean, _, _ = encoder.predict(x_test, batch_size=batch_size) plt.figure(figsize=(12, 10)) plt.scatter(z_mean[:, 0], z_mean[:, 1], c=y_test) plt.colorbar() plt.xlabel("z[0]") plt.ylabel("z[1]") plt.savefig(filename) plt.show() filename = os.path.join(model_name, "digits_over_latent.png") # display a 30x30 2D manifold of digits n = 30 digit_size = 28 figure = np.zeros((digit_size * n, digit_size * n)) # linearly spaced coordinates corresponding to the 2D plot # of digit classes in the latent space grid_x = np.linspace(-4, 4, n) grid_y = np.linspace(-4, 4, n)[::-1] for i, yi in enumerate(grid_y): for j, xi in enumerate(grid_x): z_sample = np.array([[xi, yi]]) x_decoded = decoder.predict(z_sample) digit = x_decoded[0].reshape(digit_size, digit_size) figure[i * digit_size: (i + 1) * digit_size, j * digit_size: (j + 1) * digit_size] = digit plt.figure(figsize=(10, 10)) start_range = digit_size // 2 end_range = n * digit_size + start_range + 1 pixel_range = np.arange(start_range, end_range, digit_size) sample_range_x = np.round(grid_x, 1) sample_range_y = np.round(grid_y, 1) plt.xticks(pixel_range, sample_range_x) plt.yticks(pixel_range, sample_range_y) plt.xlabel("z[0]") plt.ylabel("z[1]") plt.imshow(figure, cmap='Greys_r') plt.savefig(filename) plt.show() # MNIST dataset (x_train, y_train), (x_test, y_test) = mnist.load_data() image_size = x_train.shape[1] original_dim = image_size * image_size x_train = np.reshape(x_train, [-1, original_dim]) x_test = np.reshape(x_test, [-1, original_dim]) x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 # network parameters input_shape = (original_dim, ) intermediate_dim = 512 batch_size = 128 latent_dim = 2 epochs = 50 # VAE model = encoder + decoder # build encoder model inputs = Input(shape=input_shape, name='encoder_input') x = Dense(intermediate_dim, activation='relu')(inputs) z_mean = Dense(latent_dim, name='z_mean')(x) z_log_var = Dense(latent_dim, name='z_log_var')(x) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var]) # instantiate encoder model encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') encoder.summary() plot_model(encoder, to_file='vae_mlp_encoder.png', show_shapes=True) # build decoder model latent_inputs = Input(shape=(latent_dim,), name='z_sampling') x = Dense(intermediate_dim, activation='relu')(latent_inputs) outputs = Dense(original_dim, activation='sigmoid')(x) # instantiate decoder model decoder = Model(latent_inputs, outputs, name='decoder') decoder.summary() plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True) # instantiate VAE model outputs = decoder(encoder(inputs)[2]) vae = Model(inputs, outputs, name='vae_mlp') if __name__ == '__main__': parser = argparse.ArgumentParser() help_ = "Load h5 model trained weights" parser.add_argument("-w", "--weights", help=help_) help_ = "Use mse loss instead of binary cross entropy (default)" parser.add_argument("-m", "--mse", help=help_, action='store_true') args = parser.parse_args() models = (encoder, decoder) data = (x_test, y_test) # VAE loss = mse_loss or xent_loss + kl_loss if args.mse: reconstruction_loss = mse(inputs, outputs) else: reconstruction_loss = binary_crossentropy(inputs, outputs) reconstruction_loss *= original_dim kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='adam') vae.summary() plot_model(vae, to_file='vae_mlp.png', show_shapes=True) if args.weights: vae.load_weights(args.weights) else: # train the autoencoder vae.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None)) vae.save_weights('vae_mlp_mnist.h5') plot_results(models, data, batch_size=batch_size, model_name="vae_mlp") Keras-2.2.4/examples/babi_rnn.py0000644000000000116100000002154413326715636016305 0ustar rooteng00000000000000'''Trains two recurrent neural networks based upon a story and a question. The resulting merged vector is then queried to answer a range of bAbI tasks. The results are comparable to those for an LSTM model provided in Weston et al.: "Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks" http://arxiv.org/abs/1502.05698 Task Number | FB LSTM Baseline | Keras QA --- | --- | --- QA1 - Single Supporting Fact | 50 | 100.0 QA2 - Two Supporting Facts | 20 | 50.0 QA3 - Three Supporting Facts | 20 | 20.5 QA4 - Two Arg. Relations | 61 | 62.9 QA5 - Three Arg. Relations | 70 | 61.9 QA6 - yes/No Questions | 48 | 50.7 QA7 - Counting | 49 | 78.9 QA8 - Lists/Sets | 45 | 77.2 QA9 - Simple Negation | 64 | 64.0 QA10 - Indefinite Knowledge | 44 | 47.7 QA11 - Basic Coreference | 72 | 74.9 QA12 - Conjunction | 74 | 76.4 QA13 - Compound Coreference | 94 | 94.4 QA14 - Time Reasoning | 27 | 34.8 QA15 - Basic Deduction | 21 | 32.4 QA16 - Basic Induction | 23 | 50.6 QA17 - Positional Reasoning | 51 | 49.1 QA18 - Size Reasoning | 52 | 90.8 QA19 - Path Finding | 8 | 9.0 QA20 - Agent's Motivations | 91 | 90.7 For the resources related to the bAbI project, refer to: https://research.facebook.com/researchers/1543934539189348 # Notes - With default word, sentence, and query vector sizes, the GRU model achieves: - 100% test accuracy on QA1 in 20 epochs (2 seconds per epoch on CPU) - 50% test accuracy on QA2 in 20 epochs (16 seconds per epoch on CPU) In comparison, the Facebook paper achieves 50% and 20% for the LSTM baseline. - The task does not traditionally parse the question separately. This likely improves accuracy and is a good example of merging two RNNs. - The word vector embeddings are not shared between the story and question RNNs. - See how the accuracy changes given 10,000 training samples (en-10k) instead of only 1000. 1000 was used in order to be comparable to the original paper. - Experiment with GRU, LSTM, and JZS1-3 as they give subtly different results. - The length and noise (i.e. 'useless' story components) impact the ability for LSTMs / GRUs to provide the correct answer. Given only the supporting facts, these RNNs can achieve 100% accuracy on many tasks. Memory networks and neural networks that use attentional processes can efficiently search through this noise to find the relevant statements, improving performance substantially. This becomes especially obvious on QA2 and QA3, both far longer than QA1. ''' from __future__ import print_function from functools import reduce import re import tarfile import numpy as np from keras.utils.data_utils import get_file from keras.layers.embeddings import Embedding from keras import layers from keras.layers import recurrent from keras.models import Model from keras.preprocessing.sequence import pad_sequences def tokenize(sent): '''Return the tokens of a sentence including punctuation. >>> tokenize('Bob dropped the apple. Where is the apple?') ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?'] ''' return [x.strip() for x in re.split('(\W+)?', sent) if x.strip()] def parse_stories(lines, only_supporting=False): '''Parse stories provided in the bAbi tasks format If only_supporting is true, only the sentences that support the answer are kept. ''' data = [] story = [] for line in lines: line = line.decode('utf-8').strip() nid, line = line.split(' ', 1) nid = int(nid) if nid == 1: story = [] if '\t' in line: q, a, supporting = line.split('\t') q = tokenize(q) substory = None if only_supporting: # Only select the related substory supporting = map(int, supporting.split()) substory = [story[i - 1] for i in supporting] else: # Provide all the substories substory = [x for x in story if x] data.append((substory, q, a)) story.append('') else: sent = tokenize(line) story.append(sent) return data def get_stories(f, only_supporting=False, max_length=None): '''Given a file name, read the file, retrieve the stories, and then convert the sentences into a single story. If max_length is supplied, any stories longer than max_length tokens will be discarded. ''' data = parse_stories(f.readlines(), only_supporting=only_supporting) flatten = lambda data: reduce(lambda x, y: x + y, data) data = [(flatten(story), q, answer) for story, q, answer in data if not max_length or len(flatten(story)) < max_length] return data def vectorize_stories(data, word_idx, story_maxlen, query_maxlen): xs = [] xqs = [] ys = [] for story, query, answer in data: x = [word_idx[w] for w in story] xq = [word_idx[w] for w in query] # let's not forget that index 0 is reserved y = np.zeros(len(word_idx) + 1) y[word_idx[answer]] = 1 xs.append(x) xqs.append(xq) ys.append(y) return (pad_sequences(xs, maxlen=story_maxlen), pad_sequences(xqs, maxlen=query_maxlen), np.array(ys)) RNN = recurrent.LSTM EMBED_HIDDEN_SIZE = 50 SENT_HIDDEN_SIZE = 100 QUERY_HIDDEN_SIZE = 100 BATCH_SIZE = 32 EPOCHS = 40 print('RNN / Embed / Sent / Query = {}, {}, {}, {}'.format(RNN, EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, QUERY_HIDDEN_SIZE)) try: path = get_file('babi-tasks-v1-2.tar.gz', origin='https://s3.amazonaws.com/text-datasets/' 'babi_tasks_1-20_v1-2.tar.gz') except: print('Error downloading dataset, please download it manually:\n' '$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2' '.tar.gz\n' '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz') raise # Default QA1 with 1000 samples # challenge = 'tasks_1-20_v1-2/en/qa1_single-supporting-fact_{}.txt' # QA1 with 10,000 samples # challenge = 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txt' # QA2 with 1000 samples challenge = 'tasks_1-20_v1-2/en/qa2_two-supporting-facts_{}.txt' # QA2 with 10,000 samples # challenge = 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt' with tarfile.open(path) as tar: train = get_stories(tar.extractfile(challenge.format('train'))) test = get_stories(tar.extractfile(challenge.format('test'))) vocab = set() for story, q, answer in train + test: vocab |= set(story + q + [answer]) vocab = sorted(vocab) # Reserve 0 for masking via pad_sequences vocab_size = len(vocab) + 1 word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) story_maxlen = max(map(len, (x for x, _, _ in train + test))) query_maxlen = max(map(len, (x for _, x, _ in train + test))) x, xq, y = vectorize_stories(train, word_idx, story_maxlen, query_maxlen) tx, txq, ty = vectorize_stories(test, word_idx, story_maxlen, query_maxlen) print('vocab = {}'.format(vocab)) print('x.shape = {}'.format(x.shape)) print('xq.shape = {}'.format(xq.shape)) print('y.shape = {}'.format(y.shape)) print('story_maxlen, query_maxlen = {}, {}'.format(story_maxlen, query_maxlen)) print('Build model...') sentence = layers.Input(shape=(story_maxlen,), dtype='int32') encoded_sentence = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence) encoded_sentence = layers.Dropout(0.3)(encoded_sentence) question = layers.Input(shape=(query_maxlen,), dtype='int32') encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question) encoded_question = layers.Dropout(0.3)(encoded_question) encoded_question = RNN(EMBED_HIDDEN_SIZE)(encoded_question) encoded_question = layers.RepeatVector(story_maxlen)(encoded_question) merged = layers.add([encoded_sentence, encoded_question]) merged = RNN(EMBED_HIDDEN_SIZE)(merged) merged = layers.Dropout(0.3)(merged) preds = layers.Dense(vocab_size, activation='softmax')(merged) model = Model([sentence, question], preds) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) print('Training') model.fit([x, xq], y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.05) loss, acc = model.evaluate([tx, txq], ty, batch_size=BATCH_SIZE) print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc)) Keras-2.2.4/examples/README.md0000644000000000116100000001207113326715636015433 0ustar rooteng00000000000000# Keras examples directory ## Vision models examples [mnist_mlp.py](mnist_mlp.py) Trains a simple deep multi-layer perceptron on the MNIST dataset. [mnist_cnn.py](mnist_cnn.py) Trains a simple convnet on the MNIST dataset. [cifar10_cnn.py](cifar10_cnn.py) Trains a simple deep CNN on the CIFAR10 small images dataset. [cifar10_cnn_capsule.py](cifar10_cnn_capsule.py) Trains a simple CNN-Capsule Network on the CIFAR10 small images dataset. [cifar10_resnet.py](cifar10_resnet.py) Trains a ResNet on the CIFAR10 small images dataset. [conv_lstm.py](conv_lstm.py) Demonstrates the use of a convolutional LSTM network. [image_ocr.py](image_ocr.py) Trains a convolutional stack followed by a recurrent stack and a CTC logloss function to perform optical character recognition (OCR). [mnist_acgan.py](mnist_acgan.py) Implementation of AC-GAN (Auxiliary Classifier GAN) on the MNIST dataset [mnist_hierarchical_rnn.py](mnist_hierarchical_rnn.py) Trains a Hierarchical RNN (HRNN) to classify MNIST digits. [mnist_siamese.py](mnist_siamese.py) Trains a Siamese multi-layer perceptron on pairs of digits from the MNIST dataset. [mnist_swwae.py](mnist_swwae.py) Trains a Stacked What-Where AutoEncoder built on residual blocks on the MNIST dataset. [mnist_transfer_cnn.py](mnist_transfer_cnn.py) Transfer learning toy example on the MNIST dataset. [mnist_denoising_autoencoder.py](mnist_denoising_autoencoder.py) Trains a denoising autoencoder on the MNIST dataset. ---- ## Text & sequences examples [addition_rnn.py](addition_rnn.py) Implementation of sequence to sequence learning for performing addition of two numbers (as strings). [babi_rnn.py](babi_rnn.py) Trains a two-branch recurrent network on the bAbI dataset for reading comprehension. [babi_memnn.py](babi_memnn.py) Trains a memory network on the bAbI dataset for reading comprehension. [imdb_bidirectional_lstm.py](imdb_bidirectional_lstm.py) Trains a Bidirectional LSTM on the IMDB sentiment classification task. [imdb_cnn.py](imdb_cnn.py) Demonstrates the use of Convolution1D for text classification. [imdb_cnn_lstm.py](imdb_cnn_lstm.py) Trains a convolutional stack followed by a recurrent stack network on the IMDB sentiment classification task. [imdb_fasttext.py](imdb_fasttext.py) Trains a FastText model on the IMDB sentiment classification task. [imdb_lstm.py](imdb_lstm.py) Trains an LSTM model on the IMDB sentiment classification task. [lstm_stateful.py](lstm_stateful.py) Demonstrates how to use stateful RNNs to model long sequences efficiently. [lstm_seq2seq.py](lstm_seq2seq.py) Trains a basic character-level sequence-to-sequence model. [lstm_seq2seq_restore.py](lstm_seq2seq_restore.py) Restores a character-level sequence to sequence model from disk (saved by [lstm_seq2seq.py](lstm_seq2seq.py)) and uses it to generate predictions. [pretrained_word_embeddings.py](pretrained_word_embeddings.py) Loads pre-trained word embeddings (GloVe embeddings) into a frozen Keras Embedding layer, and uses it to train a text classification model on the 20 Newsgroup dataset. [reuters_mlp.py](reuters_mlp.py) Trains and evaluate a simple MLP on the Reuters newswire topic classification task. ---- ## Generative models examples [lstm_text_generation.py](lstm_text_generation.py) Generates text from Nietzsche's writings. [conv_filter_visualization.py](conv_filter_visualization.py) Visualization of the filters of VGG16, via gradient ascent in input space. [deep_dream.py](deep_dream.py) Deep Dreams in Keras. [neural_doodle.py](neural_doodle.py) Neural doodle. [neural_style_transfer.py](neural_style_transfer.py) Neural style transfer. [variational_autoencoder.py](variational_autoencoder.py) Demonstrates how to build a variational autoencoder. [variational_autoencoder_deconv.py](variational_autoencoder_deconv.py) Demonstrates how to build a variational autoencoder with Keras using deconvolution layers. ---- ## Examples demonstrating specific Keras functionality [antirectifier.py](antirectifier.py) Demonstrates how to write custom layers for Keras. [mnist_sklearn_wrapper.py](mnist_sklearn_wrapper.py) Demonstrates how to use the sklearn wrapper. [mnist_irnn.py](mnist_irnn.py) Reproduction of the IRNN experiment with pixel-by-pixel sequential MNIST in "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" by Le et al. [mnist_net2net.py](mnist_net2net.py) Reproduction of the Net2Net experiment with MNIST in "Net2Net: Accelerating Learning via Knowledge Transfer". [reuters_mlp_relu_vs_selu.py](reuters_mlp_relu_vs_selu.py) Compares self-normalizing MLPs with regular MLPs. [mnist_tfrecord.py](mnist_tfrecord.py) MNIST dataset with TFRecords, the standard TensorFlow data format. [mnist_dataset_api.py](mnist_dataset_api.py) MNIST dataset with TensorFlow's Dataset API. [cifar10_cnn_tfaugment2d.py](cifar10_cnn_tfaugment2d.py) Trains a simple deep CNN on the CIFAR10 small images dataset using Tensorflow internal augmentation APIs. [tensorboard_embeddings_mnist.py](tensorboard_embeddings_mnist.py) Trains a simple convnet on the MNIST dataset and embeds test data which can be later visualized using TensorBoard's Embedding Projector.Keras-2.2.4/examples/antirectifier.py0000644000000000116100000000644013247612467017361 0ustar rooteng00000000000000'''The example demonstrates how to write custom layers for Keras. We build a custom activation layer called 'Antirectifier', which modifies the shape of the tensor that passes through it. We need to specify two methods: `compute_output_shape` and `call`. Note that the same result can also be achieved via a Lambda layer. Because our custom layer is written with primitives from the Keras backend (`K`), our code can run both on TensorFlow and Theano. ''' from __future__ import print_function import keras from keras.models import Sequential from keras import layers from keras.datasets import mnist from keras import backend as K class Antirectifier(layers.Layer): '''This is the combination of a sample-wise L2 normalization with the concatenation of the positive part of the input with the negative part of the input. The result is a tensor of samples that are twice as large as the input samples. It can be used in place of a ReLU. # Input shape 2D tensor of shape (samples, n) # Output shape 2D tensor of shape (samples, 2*n) # Theoretical justification When applying ReLU, assuming that the distribution of the previous output is approximately centered around 0., you are discarding half of your input. This is inefficient. Antirectifier allows to return all-positive outputs like ReLU, without discarding any data. Tests on MNIST show that Antirectifier allows to train networks with twice less parameters yet with comparable classification accuracy as an equivalent ReLU-based network. ''' def compute_output_shape(self, input_shape): shape = list(input_shape) assert len(shape) == 2 # only valid for 2D tensors shape[-1] *= 2 return tuple(shape) def call(self, inputs): inputs -= K.mean(inputs, axis=1, keepdims=True) inputs = K.l2_normalize(inputs, axis=1) pos = K.relu(inputs) neg = K.relu(-inputs) return K.concatenate([pos, neg], axis=1) # global parameters batch_size = 128 num_classes = 10 epochs = 40 # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(60000, 784) x_test = x_test.reshape(10000, 784) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) # build the model model = Sequential() model.add(layers.Dense(256, input_shape=(784,))) model.add(Antirectifier()) model.add(layers.Dropout(0.1)) model.add(layers.Dense(256)) model.add(Antirectifier()) model.add(layers.Dropout(0.1)) model.add(layers.Dense(num_classes)) model.add(layers.Activation('softmax')) # compile the model model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) # train the model model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) # next, compare with an equivalent network # with2x bigger Dense layers and ReLU Keras-2.2.4/examples/pretrained_word_embeddings.py0000644000000000116100000001155013354530144022066 0ustar rooteng00000000000000'''This script loads pre-trained word embeddings (GloVe embeddings) into a frozen Keras Embedding layer, and uses it to train a text classification model on the 20 Newsgroup dataset (classification of newsgroup messages into 20 different categories). GloVe embedding data can be found at: http://nlp.stanford.edu/data/glove.6B.zip (source page: http://nlp.stanford.edu/projects/glove/) 20 Newsgroup data can be found at: http://www.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html ''' from __future__ import print_function import os import sys import numpy as np from keras.preprocessing.text import Tokenizer from keras.preprocessing.sequence import pad_sequences from keras.utils import to_categorical from keras.layers import Dense, Input, GlobalMaxPooling1D from keras.layers import Conv1D, MaxPooling1D, Embedding from keras.models import Model from keras.initializers import Constant BASE_DIR = '' GLOVE_DIR = os.path.join(BASE_DIR, 'glove.6B') TEXT_DATA_DIR = os.path.join(BASE_DIR, '20_newsgroup') MAX_SEQUENCE_LENGTH = 1000 MAX_NUM_WORDS = 20000 EMBEDDING_DIM = 100 VALIDATION_SPLIT = 0.2 # first, build index mapping words in the embeddings set # to their embedding vector print('Indexing word vectors.') embeddings_index = {} with open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt')) as f: for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs print('Found %s word vectors.' % len(embeddings_index)) # second, prepare text samples and their labels print('Processing text dataset') texts = [] # list of text samples labels_index = {} # dictionary mapping label name to numeric id labels = [] # list of label ids for name in sorted(os.listdir(TEXT_DATA_DIR)): path = os.path.join(TEXT_DATA_DIR, name) if os.path.isdir(path): label_id = len(labels_index) labels_index[name] = label_id for fname in sorted(os.listdir(path)): if fname.isdigit(): fpath = os.path.join(path, fname) args = {} if sys.version_info < (3,) else {'encoding': 'latin-1'} with open(fpath, **args) as f: t = f.read() i = t.find('\n\n') # skip header if 0 < i: t = t[i:] texts.append(t) labels.append(label_id) print('Found %s texts.' % len(texts)) # finally, vectorize the text samples into a 2D integer tensor tokenizer = Tokenizer(num_words=MAX_NUM_WORDS) tokenizer.fit_on_texts(texts) sequences = tokenizer.texts_to_sequences(texts) word_index = tokenizer.word_index print('Found %s unique tokens.' % len(word_index)) data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH) labels = to_categorical(np.asarray(labels)) print('Shape of data tensor:', data.shape) print('Shape of label tensor:', labels.shape) # split the data into a training set and a validation set indices = np.arange(data.shape[0]) np.random.shuffle(indices) data = data[indices] labels = labels[indices] num_validation_samples = int(VALIDATION_SPLIT * data.shape[0]) x_train = data[:-num_validation_samples] y_train = labels[:-num_validation_samples] x_val = data[-num_validation_samples:] y_val = labels[-num_validation_samples:] print('Preparing embedding matrix.') # prepare embedding matrix num_words = min(MAX_NUM_WORDS, len(word_index)) + 1 embedding_matrix = np.zeros((num_words, EMBEDDING_DIM)) for word, i in word_index.items(): if i > MAX_NUM_WORDS: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector # load pre-trained word embeddings into an Embedding layer # note that we set trainable = False so as to keep the embeddings fixed embedding_layer = Embedding(num_words, EMBEDDING_DIM, embeddings_initializer=Constant(embedding_matrix), input_length=MAX_SEQUENCE_LENGTH, trainable=False) print('Training model.') # train a 1D convnet with global maxpooling sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') embedded_sequences = embedding_layer(sequence_input) x = Conv1D(128, 5, activation='relu')(embedded_sequences) x = MaxPooling1D(5)(x) x = Conv1D(128, 5, activation='relu')(x) x = MaxPooling1D(5)(x) x = Conv1D(128, 5, activation='relu')(x) x = GlobalMaxPooling1D()(x) x = Dense(128, activation='relu')(x) preds = Dense(len(labels_index), activation='softmax')(x) model = Model(sequence_input, preds) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) model.fit(x_train, y_train, batch_size=128, epochs=10, validation_data=(x_val, y_val)) Keras-2.2.4/examples/lstm_seq2seq.py0000644000000000116100000002163313312516314017137 0ustar rooteng00000000000000'''Sequence to sequence example in Keras (character-level). This script demonstrates how to implement a basic character-level sequence-to-sequence model. We apply it to translating short English sentences into short French sentences, character-by-character. Note that it is fairly unusual to do character-level machine translation, as word-level models are more common in this domain. # Summary of the algorithm - We start with input sequences from a domain (e.g. English sentences) and corresponding target sequences from another domain (e.g. French sentences). - An encoder LSTM turns input sequences to 2 state vectors (we keep the last LSTM state and discard the outputs). - A decoder LSTM is trained to turn the target sequences into the same sequence but offset by one timestep in the future, a training process called "teacher forcing" in this context. Is uses as initial state the state vectors from the encoder. Effectively, the decoder learns to generate `targets[t+1...]` given `targets[...t]`, conditioned on the input sequence. - In inference mode, when we want to decode unknown input sequences, we: - Encode the input sequence into state vectors - Start with a target sequence of size 1 (just the start-of-sequence character) - Feed the state vectors and 1-char target sequence to the decoder to produce predictions for the next character - Sample the next character using these predictions (we simply use argmax). - Append the sampled character to the target sequence - Repeat until we generate the end-of-sequence character or we hit the character limit. # Data download English to French sentence pairs. http://www.manythings.org/anki/fra-eng.zip Lots of neat sentence pairs datasets can be found at: http://www.manythings.org/anki/ # References - Sequence to Sequence Learning with Neural Networks https://arxiv.org/abs/1409.3215 - Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation https://arxiv.org/abs/1406.1078 ''' from __future__ import print_function from keras.models import Model from keras.layers import Input, LSTM, Dense import numpy as np batch_size = 64 # Batch size for training. epochs = 100 # Number of epochs to train for. latent_dim = 256 # Latent dimensionality of the encoding space. num_samples = 10000 # Number of samples to train on. # Path to the data txt file on disk. data_path = 'fra-eng/fra.txt' # Vectorize the data. input_texts = [] target_texts = [] input_characters = set() target_characters = set() with open(data_path, 'r', encoding='utf-8') as f: lines = f.read().split('\n') for line in lines[: min(num_samples, len(lines) - 1)]: input_text, target_text = line.split('\t') # We use "tab" as the "start sequence" character # for the targets, and "\n" as "end sequence" character. target_text = '\t' + target_text + '\n' input_texts.append(input_text) target_texts.append(target_text) for char in input_text: if char not in input_characters: input_characters.add(char) for char in target_text: if char not in target_characters: target_characters.add(char) input_characters = sorted(list(input_characters)) target_characters = sorted(list(target_characters)) num_encoder_tokens = len(input_characters) num_decoder_tokens = len(target_characters) max_encoder_seq_length = max([len(txt) for txt in input_texts]) max_decoder_seq_length = max([len(txt) for txt in target_texts]) print('Number of samples:', len(input_texts)) print('Number of unique input tokens:', num_encoder_tokens) print('Number of unique output tokens:', num_decoder_tokens) print('Max sequence length for inputs:', max_encoder_seq_length) print('Max sequence length for outputs:', max_decoder_seq_length) input_token_index = dict( [(char, i) for i, char in enumerate(input_characters)]) target_token_index = dict( [(char, i) for i, char in enumerate(target_characters)]) encoder_input_data = np.zeros( (len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32') decoder_input_data = np.zeros( (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') decoder_target_data = np.zeros( (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)): for t, char in enumerate(input_text): encoder_input_data[i, t, input_token_index[char]] = 1. for t, char in enumerate(target_text): # decoder_target_data is ahead of decoder_input_data by one timestep decoder_input_data[i, t, target_token_index[char]] = 1. if t > 0: # decoder_target_data will be ahead by one timestep # and will not include the start character. decoder_target_data[i, t - 1, target_token_index[char]] = 1. # Define an input sequence and process it. encoder_inputs = Input(shape=(None, num_encoder_tokens)) encoder = LSTM(latent_dim, return_state=True) encoder_outputs, state_h, state_c = encoder(encoder_inputs) # We discard `encoder_outputs` and only keep the states. encoder_states = [state_h, state_c] # Set up the decoder, using `encoder_states` as initial state. decoder_inputs = Input(shape=(None, num_decoder_tokens)) # We set up our decoder to return full output sequences, # and to return internal states as well. We don't use the # return states in the training model, but we will use them in inference. decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) decoder_dense = Dense(num_decoder_tokens, activation='softmax') decoder_outputs = decoder_dense(decoder_outputs) # Define the model that will turn # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` model = Model([encoder_inputs, decoder_inputs], decoder_outputs) # Run training model.compile(optimizer='rmsprop', loss='categorical_crossentropy') model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=batch_size, epochs=epochs, validation_split=0.2) # Save model model.save('s2s.h5') # Next: inference mode (sampling). # Here's the drill: # 1) encode input and retrieve initial decoder state # 2) run one step of decoder with this initial state # and a "start of sequence" token as target. # Output will be the next target token # 3) Repeat with the current target token and current states # Define sampling models encoder_model = Model(encoder_inputs, encoder_states) decoder_state_input_h = Input(shape=(latent_dim,)) decoder_state_input_c = Input(shape=(latent_dim,)) decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] decoder_outputs, state_h, state_c = decoder_lstm( decoder_inputs, initial_state=decoder_states_inputs) decoder_states = [state_h, state_c] decoder_outputs = decoder_dense(decoder_outputs) decoder_model = Model( [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states) # Reverse-lookup token index to decode sequences back to # something readable. reverse_input_char_index = dict( (i, char) for char, i in input_token_index.items()) reverse_target_char_index = dict( (i, char) for char, i in target_token_index.items()) def decode_sequence(input_seq): # Encode the input as state vectors. states_value = encoder_model.predict(input_seq) # Generate empty target sequence of length 1. target_seq = np.zeros((1, 1, num_decoder_tokens)) # Populate the first character of target sequence with the start character. target_seq[0, 0, target_token_index['\t']] = 1. # Sampling loop for a batch of sequences # (to simplify, here we assume a batch of size 1). stop_condition = False decoded_sentence = '' while not stop_condition: output_tokens, h, c = decoder_model.predict( [target_seq] + states_value) # Sample a token sampled_token_index = np.argmax(output_tokens[0, -1, :]) sampled_char = reverse_target_char_index[sampled_token_index] decoded_sentence += sampled_char # Exit condition: either hit max length # or find stop character. if (sampled_char == '\n' or len(decoded_sentence) > max_decoder_seq_length): stop_condition = True # Update the target sequence (of length 1). target_seq = np.zeros((1, 1, num_decoder_tokens)) target_seq[0, 0, sampled_token_index] = 1. # Update states states_value = [h, c] return decoded_sentence for seq_index in range(100): # Take one sequence (part of the training set) # for trying out decoding. input_seq = encoder_input_data[seq_index: seq_index + 1] decoded_sentence = decode_sequence(input_seq) print('-') print('Input sentence:', input_texts[seq_index]) print('Decoded sentence:', decoded_sentence) Keras-2.2.4/examples/mnist_denoising_autoencoder.py0000644000000000116100000001144413240665765022314 0ustar rooteng00000000000000'''Trains a denoising autoencoder on MNIST dataset. Denoising is one of the classic applications of autoencoders. The denoising process removes unwanted noise that corrupted the true signal. Noise + Data ---> Denoising Autoencoder ---> Data Given a training dataset of corrupted data as input and true signal as output, a denoising autoencoder can recover the hidden structure to generate clean data. This example has modular design. The encoder, decoder and autoencoder are 3 models that share weights. For example, after training the autoencoder, the encoder can be used to generate latent vectors of input data for low-dim visualization like PCA or TSNE. ''' from __future__ import absolute_import from __future__ import division from __future__ import print_function import keras from keras.layers import Activation, Dense, Input from keras.layers import Conv2D, Flatten from keras.layers import Reshape, Conv2DTranspose from keras.models import Model from keras import backend as K from keras.datasets import mnist import numpy as np import matplotlib.pyplot as plt from PIL import Image np.random.seed(1337) # MNIST dataset (x_train, _), (x_test, _) = mnist.load_data() image_size = x_train.shape[1] x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 # Generate corrupted MNIST images by adding noise with normal dist # centered at 0.5 and std=0.5 noise = np.random.normal(loc=0.5, scale=0.5, size=x_train.shape) x_train_noisy = x_train + noise noise = np.random.normal(loc=0.5, scale=0.5, size=x_test.shape) x_test_noisy = x_test + noise x_train_noisy = np.clip(x_train_noisy, 0., 1.) x_test_noisy = np.clip(x_test_noisy, 0., 1.) # Network parameters input_shape = (image_size, image_size, 1) batch_size = 128 kernel_size = 3 latent_dim = 16 # Encoder/Decoder number of CNN layers and filters per layer layer_filters = [32, 64] # Build the Autoencoder Model # First build the Encoder Model inputs = Input(shape=input_shape, name='encoder_input') x = inputs # Stack of Conv2D blocks # Notes: # 1) Use Batch Normalization before ReLU on deep networks # 2) Use MaxPooling2D as alternative to strides>1 # - faster but not as good as strides>1 for filters in layer_filters: x = Conv2D(filters=filters, kernel_size=kernel_size, strides=2, activation='relu', padding='same')(x) # Shape info needed to build Decoder Model shape = K.int_shape(x) # Generate the latent vector x = Flatten()(x) latent = Dense(latent_dim, name='latent_vector')(x) # Instantiate Encoder Model encoder = Model(inputs, latent, name='encoder') encoder.summary() # Build the Decoder Model latent_inputs = Input(shape=(latent_dim,), name='decoder_input') x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs) x = Reshape((shape[1], shape[2], shape[3]))(x) # Stack of Transposed Conv2D blocks # Notes: # 1) Use Batch Normalization before ReLU on deep networks # 2) Use UpSampling2D as alternative to strides>1 # - faster but not as good as strides>1 for filters in layer_filters[::-1]: x = Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=2, activation='relu', padding='same')(x) x = Conv2DTranspose(filters=1, kernel_size=kernel_size, padding='same')(x) outputs = Activation('sigmoid', name='decoder_output')(x) # Instantiate Decoder Model decoder = Model(latent_inputs, outputs, name='decoder') decoder.summary() # Autoencoder = Encoder + Decoder # Instantiate Autoencoder Model autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') autoencoder.summary() autoencoder.compile(loss='mse', optimizer='adam') # Train the autoencoder autoencoder.fit(x_train_noisy, x_train, validation_data=(x_test_noisy, x_test), epochs=30, batch_size=batch_size) # Predict the Autoencoder output from corrupted test images x_decoded = autoencoder.predict(x_test_noisy) # Display the 1st 8 corrupted and denoised images rows, cols = 10, 30 num = rows * cols imgs = np.concatenate([x_test[:num], x_test_noisy[:num], x_decoded[:num]]) imgs = imgs.reshape((rows * 3, cols, image_size, image_size)) imgs = np.vstack(np.split(imgs, rows, axis=1)) imgs = imgs.reshape((rows * 3, -1, image_size, image_size)) imgs = np.vstack([np.hstack(i) for i in imgs]) imgs = (imgs * 255).astype(np.uint8) plt.figure() plt.axis('off') plt.title('Original images: top rows, ' 'Corrupted Input: middle rows, ' 'Denoised Input: third rows') plt.imshow(imgs, interpolation='none', cmap='gray') Image.fromarray(imgs).save('corrupted_and_denoised.png') plt.show() Keras-2.2.4/examples/deep_dream.py0000644000000000116100000001404513342055016016601 0ustar rooteng00000000000000'''Deep Dreaming in Keras. Run the script with: ``` python deep_dream.py path_to_your_base_image.jpg prefix_for_results ``` e.g.: ``` python deep_dream.py img/mypic.jpg results/dream ``` ''' from __future__ import print_function from keras.preprocessing.image import load_img, save_img, img_to_array import numpy as np import scipy import argparse from keras.applications import inception_v3 from keras import backend as K parser = argparse.ArgumentParser(description='Deep Dreams with Keras.') parser.add_argument('base_image_path', metavar='base', type=str, help='Path to the image to transform.') parser.add_argument('result_prefix', metavar='res_prefix', type=str, help='Prefix for the saved results.') args = parser.parse_args() base_image_path = args.base_image_path result_prefix = args.result_prefix # These are the names of the layers # for which we try to maximize activation, # as well as their weight in the final loss # we try to maximize. # You can tweak these setting to obtain new visual effects. settings = { 'features': { 'mixed2': 0.2, 'mixed3': 0.5, 'mixed4': 2., 'mixed5': 1.5, }, } def preprocess_image(image_path): # Util function to open, resize and format pictures # into appropriate tensors. img = load_img(image_path) img = img_to_array(img) img = np.expand_dims(img, axis=0) img = inception_v3.preprocess_input(img) return img def deprocess_image(x): # Util function to convert a tensor into a valid image. if K.image_data_format() == 'channels_first': x = x.reshape((3, x.shape[2], x.shape[3])) x = x.transpose((1, 2, 0)) else: x = x.reshape((x.shape[1], x.shape[2], 3)) x /= 2. x += 0.5 x *= 255. x = np.clip(x, 0, 255).astype('uint8') return x K.set_learning_phase(0) # Build the InceptionV3 network with our placeholder. # The model will be loaded with pre-trained ImageNet weights. model = inception_v3.InceptionV3(weights='imagenet', include_top=False) dream = model.input print('Model loaded.') # Get the symbolic outputs of each "key" layer (we gave them unique names). layer_dict = dict([(layer.name, layer) for layer in model.layers]) # Define the loss. loss = K.variable(0.) for layer_name in settings['features']: # Add the L2 norm of the features of a layer to the loss. assert (layer_name in layer_dict.keys(), 'Layer ' + layer_name + ' not found in model.') coeff = settings['features'][layer_name] x = layer_dict[layer_name].output # We avoid border artifacts by only involving non-border pixels in the loss. scaling = K.prod(K.cast(K.shape(x), 'float32')) if K.image_data_format() == 'channels_first': loss += coeff * K.sum(K.square(x[:, :, 2: -2, 2: -2])) / scaling else: loss += coeff * K.sum(K.square(x[:, 2: -2, 2: -2, :])) / scaling # Compute the gradients of the dream wrt the loss. grads = K.gradients(loss, dream)[0] # Normalize gradients. grads /= K.maximum(K.mean(K.abs(grads)), K.epsilon()) # Set up function to retrieve the value # of the loss and gradients given an input image. outputs = [loss, grads] fetch_loss_and_grads = K.function([dream], outputs) def eval_loss_and_grads(x): outs = fetch_loss_and_grads([x]) loss_value = outs[0] grad_values = outs[1] return loss_value, grad_values def resize_img(img, size): img = np.copy(img) if K.image_data_format() == 'channels_first': factors = (1, 1, float(size[0]) / img.shape[2], float(size[1]) / img.shape[3]) else: factors = (1, float(size[0]) / img.shape[1], float(size[1]) / img.shape[2], 1) return scipy.ndimage.zoom(img, factors, order=1) def gradient_ascent(x, iterations, step, max_loss=None): for i in range(iterations): loss_value, grad_values = eval_loss_and_grads(x) if max_loss is not None and loss_value > max_loss: break print('..Loss value at', i, ':', loss_value) x += step * grad_values return x """Process: - Load the original image. - Define a number of processing scales (i.e. image shapes), from smallest to largest. - Resize the original image to the smallest scale. - For every scale, starting with the smallest (i.e. current one): - Run gradient ascent - Upscale image to the next scale - Reinject the detail that was lost at upscaling time - Stop when we are back to the original size. To obtain the detail lost during upscaling, we simply take the original image, shrink it down, upscale it, and compare the result to the (resized) original image. """ # Playing with these hyperparameters will also allow you to achieve new effects step = 0.01 # Gradient ascent step size num_octave = 3 # Number of scales at which to run gradient ascent octave_scale = 1.4 # Size ratio between scales iterations = 20 # Number of ascent steps per scale max_loss = 10. img = preprocess_image(base_image_path) if K.image_data_format() == 'channels_first': original_shape = img.shape[2:] else: original_shape = img.shape[1:3] successive_shapes = [original_shape] for i in range(1, num_octave): shape = tuple([int(dim / (octave_scale ** i)) for dim in original_shape]) successive_shapes.append(shape) successive_shapes = successive_shapes[::-1] original_img = np.copy(img) shrunk_original_img = resize_img(img, successive_shapes[0]) for shape in successive_shapes: print('Processing image shape', shape) img = resize_img(img, shape) img = gradient_ascent(img, iterations=iterations, step=step, max_loss=max_loss) upscaled_shrunk_original_img = resize_img(shrunk_original_img, shape) same_size_original = resize_img(original_img, shape) lost_detail = same_size_original - upscaled_shrunk_original_img img += lost_detail shrunk_original_img = resize_img(original_img, shape) save_img(result_prefix + '.png', deprocess_image(np.copy(img))) Keras-2.2.4/examples/reuters_mlp.py0000644000000000116100000000364313146670577017100 0ustar rooteng00000000000000'''Trains and evaluate a simple MLP on the Reuters newswire topic classification task. ''' from __future__ import print_function import numpy as np import keras from keras.datasets import reuters from keras.models import Sequential from keras.layers import Dense, Dropout, Activation from keras.preprocessing.text import Tokenizer max_words = 1000 batch_size = 32 epochs = 5 print('Loading data...') (x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words, test_split=0.2) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') num_classes = np.max(y_train) + 1 print(num_classes, 'classes') print('Vectorizing sequence data...') tokenizer = Tokenizer(num_words=max_words) x_train = tokenizer.sequences_to_matrix(x_train, mode='binary') x_test = tokenizer.sequences_to_matrix(x_test, mode='binary') print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) print('Convert class vector to binary class matrix ' '(for use with categorical_crossentropy)') y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) print('y_train shape:', y_train.shape) print('y_test shape:', y_test.shape) print('Building model...') model = Sequential() model.add(Dense(512, input_shape=(max_words,))) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.1) score = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=1) print('Test score:', score[0]) print('Test accuracy:', score[1]) Keras-2.2.4/examples/addition_rnn.py0000644000000000116100000001560613326715636017205 0ustar rooteng00000000000000# -*- coding: utf-8 -*- '''An implementation of sequence to sequence learning for performing addition Input: "535+61" Output: "596" Padding is handled by using a repeated sentinel character (space) Input may optionally be reversed, shown to increase performance in many tasks in: "Learning to Execute" http://arxiv.org/abs/1410.4615 and "Sequence to Sequence Learning with Neural Networks" http://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf Theoretically it introduces shorter term dependencies between source and target. Two digits reversed: + One layer LSTM (128 HN), 5k training examples = 99% train/test accuracy in 55 epochs Three digits reversed: + One layer LSTM (128 HN), 50k training examples = 99% train/test accuracy in 100 epochs Four digits reversed: + One layer LSTM (128 HN), 400k training examples = 99% train/test accuracy in 20 epochs Five digits reversed: + One layer LSTM (128 HN), 550k training examples = 99% train/test accuracy in 30 epochs ''' # noqa from __future__ import print_function from keras.models import Sequential from keras import layers import numpy as np from six.moves import range class CharacterTable(object): """Given a set of characters: + Encode them to a one hot integer representation + Decode the one hot integer representation to their character output + Decode a vector of probabilities to their character output """ def __init__(self, chars): """Initialize character table. # Arguments chars: Characters that can appear in the input. """ self.chars = sorted(set(chars)) self.char_indices = dict((c, i) for i, c in enumerate(self.chars)) self.indices_char = dict((i, c) for i, c in enumerate(self.chars)) def encode(self, C, num_rows): """One hot encode given string C. # Arguments num_rows: Number of rows in the returned one hot encoding. This is used to keep the # of rows for each data the same. """ x = np.zeros((num_rows, len(self.chars))) for i, c in enumerate(C): x[i, self.char_indices[c]] = 1 return x def decode(self, x, calc_argmax=True): if calc_argmax: x = x.argmax(axis=-1) return ''.join(self.indices_char[x] for x in x) class colors: ok = '\033[92m' fail = '\033[91m' close = '\033[0m' # Parameters for the model and dataset. TRAINING_SIZE = 50000 DIGITS = 3 REVERSE = True # Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of # int is DIGITS. MAXLEN = DIGITS + 1 + DIGITS # All the numbers, plus sign and space for padding. chars = '0123456789+ ' ctable = CharacterTable(chars) questions = [] expected = [] seen = set() print('Generating data...') while len(questions) < TRAINING_SIZE: f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1)))) a, b = f(), f() # Skip any addition questions we've already seen # Also skip any such that x+Y == Y+x (hence the sorting). key = tuple(sorted((a, b))) if key in seen: continue seen.add(key) # Pad the data with spaces such that it is always MAXLEN. q = '{}+{}'.format(a, b) query = q + ' ' * (MAXLEN - len(q)) ans = str(a + b) # Answers can be of maximum size DIGITS + 1. ans += ' ' * (DIGITS + 1 - len(ans)) if REVERSE: # Reverse the query, e.g., '12+345 ' becomes ' 543+21'. (Note the # space used for padding.) query = query[::-1] questions.append(query) expected.append(ans) print('Total addition questions:', len(questions)) print('Vectorization...') x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool) y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool) for i, sentence in enumerate(questions): x[i] = ctable.encode(sentence, MAXLEN) for i, sentence in enumerate(expected): y[i] = ctable.encode(sentence, DIGITS + 1) # Shuffle (x, y) in unison as the later parts of x will almost all be larger # digits. indices = np.arange(len(y)) np.random.shuffle(indices) x = x[indices] y = y[indices] # Explicitly set apart 10% for validation data that we never train over. split_at = len(x) - len(x) // 10 (x_train, x_val) = x[:split_at], x[split_at:] (y_train, y_val) = y[:split_at], y[split_at:] print('Training Data:') print(x_train.shape) print(y_train.shape) print('Validation Data:') print(x_val.shape) print(y_val.shape) # Try replacing GRU, or SimpleRNN. RNN = layers.LSTM HIDDEN_SIZE = 128 BATCH_SIZE = 128 LAYERS = 1 print('Build model...') model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE. # Note: In a situation where your input sequences have a variable length, # use input_shape=(None, num_feature). model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars)))) # As the decoder RNN's input, repeatedly provide with the last hidden state of # RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum # length of output, e.g., when DIGITS=3, max output is 999+999=1998. model.add(layers.RepeatVector(DIGITS + 1)) # The decoder RNN could be multiple layers stacked or a single layer. for _ in range(LAYERS): # By setting return_sequences to True, return not only the last output but # all the outputs so far in the form of (num_samples, timesteps, # output_dim). This is necessary as TimeDistributed in the below expects # the first dimension to be the timesteps. model.add(RNN(HIDDEN_SIZE, return_sequences=True)) # Apply a dense layer to the every temporal slice of an input. For each of step # of the output sequence, decide which character should be chosen. model.add(layers.TimeDistributed(layers.Dense(len(chars)))) model.add(layers.Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() # Train the model each generation and show predictions against the validation # dataset. for iteration in range(1, 200): print() print('-' * 50) print('Iteration', iteration) model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, validation_data=(x_val, y_val)) # Select 10 samples from the validation set at random so we can visualize # errors. for i in range(10): ind = np.random.randint(0, len(x_val)) rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])] preds = model.predict_classes(rowx, verbose=0) q = ctable.decode(rowx[0]) correct = ctable.decode(rowy[0]) guess = ctable.decode(preds[0], calc_argmax=False) print('Q', q[::-1] if REVERSE else q, end=' ') print('T', correct, end=' ') if correct == guess: print(colors.ok + '☑' + colors.close, end=' ') else: print(colors.fail + '☒' + colors.close, end=' ') print(guess) Keras-2.2.4/examples/babi_memnn.py0000644000000000116100000002046213326715636016620 0ustar rooteng00000000000000'''Trains a memory network on the bAbI dataset. References: - Jason Weston, Antoine Bordes, Sumit Chopra, Tomas Mikolov, Alexander M. Rush, "Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks", http://arxiv.org/abs/1502.05698 - Sainbayar Sukhbaatar, Arthur Szlam, Jason Weston, Rob Fergus, "End-To-End Memory Networks", http://arxiv.org/abs/1503.08895 Reaches 98.6% accuracy on task 'single_supporting_fact_10k' after 120 epochs. Time per epoch: 3s on CPU (core i7). ''' from __future__ import print_function from keras.models import Sequential, Model from keras.layers.embeddings import Embedding from keras.layers import Input, Activation, Dense, Permute, Dropout from keras.layers import add, dot, concatenate from keras.layers import LSTM from keras.utils.data_utils import get_file from keras.preprocessing.sequence import pad_sequences from functools import reduce import tarfile import numpy as np import re def tokenize(sent): '''Return the tokens of a sentence including punctuation. >>> tokenize('Bob dropped the apple. Where is the apple?') ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?'] ''' return [x.strip() for x in re.split('(\W+)?', sent) if x.strip()] def parse_stories(lines, only_supporting=False): '''Parse stories provided in the bAbi tasks format If only_supporting is true, only the sentences that support the answer are kept. ''' data = [] story = [] for line in lines: line = line.decode('utf-8').strip() nid, line = line.split(' ', 1) nid = int(nid) if nid == 1: story = [] if '\t' in line: q, a, supporting = line.split('\t') q = tokenize(q) substory = None if only_supporting: # Only select the related substory supporting = map(int, supporting.split()) substory = [story[i - 1] for i in supporting] else: # Provide all the substories substory = [x for x in story if x] data.append((substory, q, a)) story.append('') else: sent = tokenize(line) story.append(sent) return data def get_stories(f, only_supporting=False, max_length=None): '''Given a file name, read the file, retrieve the stories, and then convert the sentences into a single story. If max_length is supplied, any stories longer than max_length tokens will be discarded. ''' data = parse_stories(f.readlines(), only_supporting=only_supporting) flatten = lambda data: reduce(lambda x, y: x + y, data) data = [(flatten(story), q, answer) for story, q, answer in data if not max_length or len(flatten(story)) < max_length] return data def vectorize_stories(data): inputs, queries, answers = [], [], [] for story, query, answer in data: inputs.append([word_idx[w] for w in story]) queries.append([word_idx[w] for w in query]) answers.append(word_idx[answer]) return (pad_sequences(inputs, maxlen=story_maxlen), pad_sequences(queries, maxlen=query_maxlen), np.array(answers)) try: path = get_file('babi-tasks-v1-2.tar.gz', origin='https://s3.amazonaws.com/text-datasets/' 'babi_tasks_1-20_v1-2.tar.gz') except: print('Error downloading dataset, please download it manually:\n' '$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2' '.tar.gz\n' '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz') raise challenges = { # QA1 with 10,000 samples 'single_supporting_fact_10k': 'tasks_1-20_v1-2/en-10k/qa1_' 'single-supporting-fact_{}.txt', # QA2 with 10,000 samples 'two_supporting_facts_10k': 'tasks_1-20_v1-2/en-10k/qa2_' 'two-supporting-facts_{}.txt', } challenge_type = 'single_supporting_fact_10k' challenge = challenges[challenge_type] print('Extracting stories for the challenge:', challenge_type) with tarfile.open(path) as tar: train_stories = get_stories(tar.extractfile(challenge.format('train'))) test_stories = get_stories(tar.extractfile(challenge.format('test'))) vocab = set() for story, q, answer in train_stories + test_stories: vocab |= set(story + q + [answer]) vocab = sorted(vocab) # Reserve 0 for masking via pad_sequences vocab_size = len(vocab) + 1 story_maxlen = max(map(len, (x for x, _, _ in train_stories + test_stories))) query_maxlen = max(map(len, (x for _, x, _ in train_stories + test_stories))) print('-') print('Vocab size:', vocab_size, 'unique words') print('Story max length:', story_maxlen, 'words') print('Query max length:', query_maxlen, 'words') print('Number of training stories:', len(train_stories)) print('Number of test stories:', len(test_stories)) print('-') print('Here\'s what a "story" tuple looks like (input, query, answer):') print(train_stories[0]) print('-') print('Vectorizing the word sequences...') word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) inputs_train, queries_train, answers_train = vectorize_stories(train_stories) inputs_test, queries_test, answers_test = vectorize_stories(test_stories) print('-') print('inputs: integer tensor of shape (samples, max_length)') print('inputs_train shape:', inputs_train.shape) print('inputs_test shape:', inputs_test.shape) print('-') print('queries: integer tensor of shape (samples, max_length)') print('queries_train shape:', queries_train.shape) print('queries_test shape:', queries_test.shape) print('-') print('answers: binary (1 or 0) tensor of shape (samples, vocab_size)') print('answers_train shape:', answers_train.shape) print('answers_test shape:', answers_test.shape) print('-') print('Compiling...') # placeholders input_sequence = Input((story_maxlen,)) question = Input((query_maxlen,)) # encoders # embed the input sequence into a sequence of vectors input_encoder_m = Sequential() input_encoder_m.add(Embedding(input_dim=vocab_size, output_dim=64)) input_encoder_m.add(Dropout(0.3)) # output: (samples, story_maxlen, embedding_dim) # embed the input into a sequence of vectors of size query_maxlen input_encoder_c = Sequential() input_encoder_c.add(Embedding(input_dim=vocab_size, output_dim=query_maxlen)) input_encoder_c.add(Dropout(0.3)) # output: (samples, story_maxlen, query_maxlen) # embed the question into a sequence of vectors question_encoder = Sequential() question_encoder.add(Embedding(input_dim=vocab_size, output_dim=64, input_length=query_maxlen)) question_encoder.add(Dropout(0.3)) # output: (samples, query_maxlen, embedding_dim) # encode input sequence and questions (which are indices) # to sequences of dense vectors input_encoded_m = input_encoder_m(input_sequence) input_encoded_c = input_encoder_c(input_sequence) question_encoded = question_encoder(question) # compute a 'match' between the first input vector sequence # and the question vector sequence # shape: `(samples, story_maxlen, query_maxlen)` match = dot([input_encoded_m, question_encoded], axes=(2, 2)) match = Activation('softmax')(match) # add the match matrix with the second input vector sequence response = add([match, input_encoded_c]) # (samples, story_maxlen, query_maxlen) response = Permute((2, 1))(response) # (samples, query_maxlen, story_maxlen) # concatenate the match matrix with the question vector sequence answer = concatenate([response, question_encoded]) # the original paper uses a matrix multiplication for this reduction step. # we choose to use a RNN instead. answer = LSTM(32)(answer) # (samples, 32) # one regularization layer -- more would probably be needed. answer = Dropout(0.3)(answer) answer = Dense(vocab_size)(answer) # (samples, vocab_size) # we output a probability distribution over the vocabulary answer = Activation('softmax')(answer) # build the final model model = Model([input_sequence, question], answer) model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # train model.fit([inputs_train, queries_train], answers_train, batch_size=32, epochs=120, validation_data=([inputs_test, queries_test], answers_test)) Keras-2.2.4/examples/image_ocr.py0000644000000000116100000005227213342055016016445 0ustar rooteng00000000000000# -*- coding: utf-8 -*- '''This example uses a convolutional stack followed by a recurrent stack and a CTC logloss function to perform optical character recognition of generated text images. I have no evidence of whether it actually learns general shapes of text, or just is able to recognize all the different fonts thrown at it...the purpose is more to demonstrate CTC inside of Keras. Note that the font list may need to be updated for the particular OS in use. This starts off with 4 letter words. For the first 12 epochs, the difficulty is gradually increased using the TextImageGenerator class which is both a generator class for test/train data and a Keras callback class. After 20 epochs, longer sequences are thrown at it by recompiling the model to handle a wider image and rebuilding the word list to include two words separated by a space. The table below shows normalized edit distance values. Theano uses a slightly different CTC implementation, hence the different results. Norm. ED Epoch | TF | TH ------------------------ 10 0.027 0.064 15 0.038 0.035 20 0.043 0.045 25 0.014 0.019 This requires cairo and editdistance packages: pip install cairocffi pip install editdistance Created by Mike Henry https://github.com/mbhenry/ ''' import os import itertools import codecs import re import datetime import cairocffi as cairo import editdistance import numpy as np from scipy import ndimage import pylab from keras import backend as K from keras.layers.convolutional import Conv2D, MaxPooling2D from keras.layers import Input, Dense, Activation from keras.layers import Reshape, Lambda from keras.layers.merge import add, concatenate from keras.models import Model from keras.layers.recurrent import GRU from keras.optimizers import SGD from keras.utils.data_utils import get_file from keras.preprocessing import image import keras.callbacks OUTPUT_DIR = 'image_ocr' # character classes and matching regex filter regex = r'^[a-z ]+$' alphabet = u'abcdefghijklmnopqrstuvwxyz ' np.random.seed(55) # this creates larger "blotches" of noise which look # more realistic than just adding gaussian noise # assumes greyscale with pixels ranging from 0 to 1 def speckle(img): severity = np.random.uniform(0, 0.6) blur = ndimage.gaussian_filter(np.random.randn(*img.shape) * severity, 1) img_speck = (img + blur) img_speck[img_speck > 1] = 1 img_speck[img_speck <= 0] = 0 return img_speck # paints the string in a random location the bounding box # also uses a random font, a slight random rotation, # and a random amount of speckle noise def paint_text(text, w, h, rotate=False, ud=False, multi_fonts=False): surface = cairo.ImageSurface(cairo.FORMAT_RGB24, w, h) with cairo.Context(surface) as context: context.set_source_rgb(1, 1, 1) # White context.paint() # this font list works in CentOS 7 if multi_fonts: fonts = [ 'Century Schoolbook', 'Courier', 'STIX', 'URW Chancery L', 'FreeMono'] context.select_font_face( np.random.choice(fonts), cairo.FONT_SLANT_NORMAL, np.random.choice([cairo.FONT_WEIGHT_BOLD, cairo.FONT_WEIGHT_NORMAL])) else: context.select_font_face('Courier', cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_BOLD) context.set_font_size(25) box = context.text_extents(text) border_w_h = (4, 4) if box[2] > (w - 2 * border_w_h[1]) or box[3] > (h - 2 * border_w_h[0]): raise IOError(('Could not fit string into image.' 'Max char count is too large for given image width.')) # teach the RNN translational invariance by # fitting text box randomly on canvas, with some room to rotate max_shift_x = w - box[2] - border_w_h[0] max_shift_y = h - box[3] - border_w_h[1] top_left_x = np.random.randint(0, int(max_shift_x)) if ud: top_left_y = np.random.randint(0, int(max_shift_y)) else: top_left_y = h // 2 context.move_to(top_left_x - int(box[0]), top_left_y - int(box[1])) context.set_source_rgb(0, 0, 0) context.show_text(text) buf = surface.get_data() a = np.frombuffer(buf, np.uint8) a.shape = (h, w, 4) a = a[:, :, 0] # grab single channel a = a.astype(np.float32) / 255 a = np.expand_dims(a, 0) if rotate: a = image.random_rotation(a, 3 * (w - top_left_x) / w + 1) a = speckle(a) return a def shuffle_mats_or_lists(matrix_list, stop_ind=None): ret = [] assert all([len(i) == len(matrix_list[0]) for i in matrix_list]) len_val = len(matrix_list[0]) if stop_ind is None: stop_ind = len_val assert stop_ind <= len_val a = list(range(stop_ind)) np.random.shuffle(a) a += list(range(stop_ind, len_val)) for mat in matrix_list: if isinstance(mat, np.ndarray): ret.append(mat[a]) elif isinstance(mat, list): ret.append([mat[i] for i in a]) else: raise TypeError('`shuffle_mats_or_lists` only supports ' 'numpy.array and list objects.') return ret # Translation of characters to unique integer values def text_to_labels(text): ret = [] for char in text: ret.append(alphabet.find(char)) return ret # Reverse translation of numerical classes back to characters def labels_to_text(labels): ret = [] for c in labels: if c == len(alphabet): # CTC Blank ret.append("") else: ret.append(alphabet[c]) return "".join(ret) # only a-z and space..probably not to difficult # to expand to uppercase and symbols def is_valid_str(in_str): search = re.compile(regex, re.UNICODE).search return bool(search(in_str)) # Uses generator functions to supply train/test with # data. Image renderings are text are created on the fly # each time with random perturbations class TextImageGenerator(keras.callbacks.Callback): def __init__(self, monogram_file, bigram_file, minibatch_size, img_w, img_h, downsample_factor, val_split, absolute_max_string_len=16): self.minibatch_size = minibatch_size self.img_w = img_w self.img_h = img_h self.monogram_file = monogram_file self.bigram_file = bigram_file self.downsample_factor = downsample_factor self.val_split = val_split self.blank_label = self.get_output_size() - 1 self.absolute_max_string_len = absolute_max_string_len def get_output_size(self): return len(alphabet) + 1 # num_words can be independent of the epoch size due to the use of generators # as max_string_len grows, num_words can grow def build_word_list(self, num_words, max_string_len=None, mono_fraction=0.5): assert max_string_len <= self.absolute_max_string_len assert num_words % self.minibatch_size == 0 assert (self.val_split * num_words) % self.minibatch_size == 0 self.num_words = num_words self.string_list = [''] * self.num_words tmp_string_list = [] self.max_string_len = max_string_len self.Y_data = np.ones([self.num_words, self.absolute_max_string_len]) * -1 self.X_text = [] self.Y_len = [0] * self.num_words def _is_length_of_word_valid(word): return (max_string_len == -1 or max_string_len is None or len(word) <= max_string_len) # monogram file is sorted by frequency in english speech with codecs.open(self.monogram_file, mode='r', encoding='utf-8') as f: for line in f: if len(tmp_string_list) == int(self.num_words * mono_fraction): break word = line.rstrip() if _is_length_of_word_valid(word): tmp_string_list.append(word) # bigram file contains common word pairings in english speech with codecs.open(self.bigram_file, mode='r', encoding='utf-8') as f: lines = f.readlines() for line in lines: if len(tmp_string_list) == self.num_words: break columns = line.lower().split() word = columns[0] + ' ' + columns[1] if is_valid_str(word) and _is_length_of_word_valid(word): tmp_string_list.append(word) if len(tmp_string_list) != self.num_words: raise IOError('Could not pull enough words' 'from supplied monogram and bigram files.') # interlace to mix up the easy and hard words self.string_list[::2] = tmp_string_list[:self.num_words // 2] self.string_list[1::2] = tmp_string_list[self.num_words // 2:] for i, word in enumerate(self.string_list): self.Y_len[i] = len(word) self.Y_data[i, 0:len(word)] = text_to_labels(word) self.X_text.append(word) self.Y_len = np.expand_dims(np.array(self.Y_len), 1) self.cur_val_index = self.val_split self.cur_train_index = 0 # each time an image is requested from train/val/test, a new random # painting of the text is performed def get_batch(self, index, size, train): # width and height are backwards from typical Keras convention # because width is the time dimension when it gets fed into the RNN if K.image_data_format() == 'channels_first': X_data = np.ones([size, 1, self.img_w, self.img_h]) else: X_data = np.ones([size, self.img_w, self.img_h, 1]) labels = np.ones([size, self.absolute_max_string_len]) input_length = np.zeros([size, 1]) label_length = np.zeros([size, 1]) source_str = [] for i in range(size): # Mix in some blank inputs. This seems to be important for # achieving translational invariance if train and i > size - 4: if K.image_data_format() == 'channels_first': X_data[i, 0, 0:self.img_w, :] = self.paint_func('')[0, :, :].T else: X_data[i, 0:self.img_w, :, 0] = self.paint_func('',)[0, :, :].T labels[i, 0] = self.blank_label input_length[i] = self.img_w // self.downsample_factor - 2 label_length[i] = 1 source_str.append('') else: if K.image_data_format() == 'channels_first': X_data[i, 0, 0:self.img_w, :] = ( self.paint_func(self.X_text[index + i])[0, :, :].T) else: X_data[i, 0:self.img_w, :, 0] = ( self.paint_func(self.X_text[index + i])[0, :, :].T) labels[i, :] = self.Y_data[index + i] input_length[i] = self.img_w // self.downsample_factor - 2 label_length[i] = self.Y_len[index + i] source_str.append(self.X_text[index + i]) inputs = {'the_input': X_data, 'the_labels': labels, 'input_length': input_length, 'label_length': label_length, 'source_str': source_str # used for visualization only } outputs = {'ctc': np.zeros([size])} # dummy data for dummy loss function return (inputs, outputs) def next_train(self): while 1: ret = self.get_batch(self.cur_train_index, self.minibatch_size, train=True) self.cur_train_index += self.minibatch_size if self.cur_train_index >= self.val_split: self.cur_train_index = self.cur_train_index % 32 (self.X_text, self.Y_data, self.Y_len) = shuffle_mats_or_lists( [self.X_text, self.Y_data, self.Y_len], self.val_split) yield ret def next_val(self): while 1: ret = self.get_batch(self.cur_val_index, self.minibatch_size, train=False) self.cur_val_index += self.minibatch_size if self.cur_val_index >= self.num_words: self.cur_val_index = self.val_split + self.cur_val_index % 32 yield ret def on_train_begin(self, logs={}): self.build_word_list(16000, 4, 1) self.paint_func = lambda text: paint_text( text, self.img_w, self.img_h, rotate=False, ud=False, multi_fonts=False) def on_epoch_begin(self, epoch, logs={}): # rebind the paint function to implement curriculum learning if 3 <= epoch < 6: self.paint_func = lambda text: paint_text( text, self.img_w, self.img_h, rotate=False, ud=True, multi_fonts=False) elif 6 <= epoch < 9: self.paint_func = lambda text: paint_text( text, self.img_w, self.img_h, rotate=False, ud=True, multi_fonts=True) elif epoch >= 9: self.paint_func = lambda text: paint_text( text, self.img_w, self.img_h, rotate=True, ud=True, multi_fonts=True) if epoch >= 21 and self.max_string_len < 12: self.build_word_list(32000, 12, 0.5) # the actual loss calc occurs here despite it not being # an internal Keras loss function def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args # the 2 is critical here since the first couple outputs of the RNN # tend to be garbage: y_pred = y_pred[:, 2:, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length) # For a real OCR application, this should be beam search with a dictionary # and language model. For this example, best path is sufficient. def decode_batch(test_func, word_batch): out = test_func([word_batch])[0] ret = [] for j in range(out.shape[0]): out_best = list(np.argmax(out[j, 2:], 1)) out_best = [k for k, g in itertools.groupby(out_best)] outstr = labels_to_text(out_best) ret.append(outstr) return ret class VizCallback(keras.callbacks.Callback): def __init__(self, run_name, test_func, text_img_gen, num_display_words=6): self.test_func = test_func self.output_dir = os.path.join( OUTPUT_DIR, run_name) self.text_img_gen = text_img_gen self.num_display_words = num_display_words if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) def show_edit_distance(self, num): num_left = num mean_norm_ed = 0.0 mean_ed = 0.0 while num_left > 0: word_batch = next(self.text_img_gen)[0] num_proc = min(word_batch['the_input'].shape[0], num_left) decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc]) for j in range(num_proc): edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j]) mean_ed += float(edit_dist) mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j]) num_left -= num_proc mean_norm_ed = mean_norm_ed / num mean_ed = mean_ed / num print('\nOut of %d samples: Mean edit distance:' '%.3f Mean normalized edit distance: %0.3f' % (num, mean_ed, mean_norm_ed)) def on_epoch_end(self, epoch, logs={}): self.model.save_weights( os.path.join(self.output_dir, 'weights%02d.h5' % (epoch))) self.show_edit_distance(256) word_batch = next(self.text_img_gen)[0] res = decode_batch(self.test_func, word_batch['the_input'][0:self.num_display_words]) if word_batch['the_input'][0].shape[0] < 256: cols = 2 else: cols = 1 for i in range(self.num_display_words): pylab.subplot(self.num_display_words // cols, cols, i + 1) if K.image_data_format() == 'channels_first': the_input = word_batch['the_input'][i, 0, :, :] else: the_input = word_batch['the_input'][i, :, :, 0] pylab.imshow(the_input.T, cmap='Greys_r') pylab.xlabel( 'Truth = \'%s\'\nDecoded = \'%s\'' % (word_batch['source_str'][i], res[i])) fig = pylab.gcf() fig.set_size_inches(10, 13) pylab.savefig(os.path.join(self.output_dir, 'e%02d.png' % (epoch))) pylab.close() def train(run_name, start_epoch, stop_epoch, img_w): # Input Parameters img_h = 64 words_per_epoch = 16000 val_split = 0.2 val_words = int(words_per_epoch * (val_split)) # Network parameters conv_filters = 16 kernel_size = (3, 3) pool_size = 2 time_dense_size = 32 rnn_size = 512 minibatch_size = 32 if K.image_data_format() == 'channels_first': input_shape = (1, img_w, img_h) else: input_shape = (img_w, img_h, 1) fdir = os.path.dirname( get_file('wordlists.tgz', origin='http://www.mythic-ai.com/datasets/wordlists.tgz', untar=True)) img_gen = TextImageGenerator( monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'), bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'), minibatch_size=minibatch_size, img_w=img_w, img_h=img_h, downsample_factor=(pool_size ** 2), val_split=words_per_epoch - val_words) act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirectional GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(inner) Model(inputs=input_data, outputs=y_pred).summary() labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda( ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) if start_epoch > 0: weight_file = os.path.join( OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1))) model.load_weights(weight_file) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) viz_cb = VizCallback(run_name, test_func, img_gen.next_val()) model.fit_generator( generator=img_gen.next_train(), steps_per_epoch=(words_per_epoch - val_words) // minibatch_size, epochs=stop_epoch, validation_data=img_gen.next_val(), validation_steps=val_words // minibatch_size, callbacks=[viz_cb, img_gen], initial_epoch=start_epoch) if __name__ == '__main__': run_name = datetime.datetime.now().strftime('%Y:%m:%d:%H:%M:%S') train(run_name, 0, 20, 128) # increase to wider images and start at epoch 20. # The learned weights are reloaded train(run_name, 20, 25, 512) Keras-2.2.4/examples/mnist_siamese.py0000644000000000116100000001065213342055016017354 0ustar rooteng00000000000000'''Trains a Siamese MLP on pairs of digits from the MNIST dataset. It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the output of the shared network and by optimizing the contrastive loss (see paper for mode details). # References - Dimensionality Reduction by Learning an Invariant Mapping http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf Gets to 97.2% test accuracy after 20 epochs. 2 seconds per epoch on a Titan X Maxwell GPU ''' from __future__ import absolute_import from __future__ import print_function import numpy as np import random from keras.datasets import mnist from keras.models import Model from keras.layers import Input, Flatten, Dense, Dropout, Lambda from keras.optimizers import RMSprop from keras import backend as K num_classes = 10 epochs = 20 def euclidean_distance(vects): x, y = vects sum_square = K.sum(K.square(x - y), axis=1, keepdims=True) return K.sqrt(K.maximum(sum_square, K.epsilon())) def eucl_dist_output_shape(shapes): shape1, shape2 = shapes return (shape1[0], 1) def contrastive_loss(y_true, y_pred): '''Contrastive loss from Hadsell-et-al.'06 http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf ''' margin = 1 sqaure_pred = K.square(y_pred) margin_square = K.square(K.maximum(margin - y_pred, 0)) return K.mean(y_true * sqaure_pred + (1 - y_true) * margin_square) def create_pairs(x, digit_indices): '''Positive and negative pair creation. Alternates between positive and negative pairs. ''' pairs = [] labels = [] n = min([len(digit_indices[d]) for d in range(num_classes)]) - 1 for d in range(num_classes): for i in range(n): z1, z2 = digit_indices[d][i], digit_indices[d][i + 1] pairs += [[x[z1], x[z2]]] inc = random.randrange(1, num_classes) dn = (d + inc) % num_classes z1, z2 = digit_indices[d][i], digit_indices[dn][i] pairs += [[x[z1], x[z2]]] labels += [1, 0] return np.array(pairs), np.array(labels) def create_base_network(input_shape): '''Base network to be shared (eq. to feature extraction). ''' input = Input(shape=input_shape) x = Flatten()(input) x = Dense(128, activation='relu')(x) x = Dropout(0.1)(x) x = Dense(128, activation='relu')(x) x = Dropout(0.1)(x) x = Dense(128, activation='relu')(x) return Model(input, x) def compute_accuracy(y_true, y_pred): '''Compute classification accuracy with a fixed threshold on distances. ''' pred = y_pred.ravel() < 0.5 return np.mean(pred == y_true) def accuracy(y_true, y_pred): '''Compute classification accuracy with a fixed threshold on distances. ''' return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype))) # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 input_shape = x_train.shape[1:] # create training+test positive and negative pairs digit_indices = [np.where(y_train == i)[0] for i in range(num_classes)] tr_pairs, tr_y = create_pairs(x_train, digit_indices) digit_indices = [np.where(y_test == i)[0] for i in range(num_classes)] te_pairs, te_y = create_pairs(x_test, digit_indices) # network definition base_network = create_base_network(input_shape) input_a = Input(shape=input_shape) input_b = Input(shape=input_shape) # because we re-use the same instance `base_network`, # the weights of the network # will be shared across the two branches processed_a = base_network(input_a) processed_b = base_network(input_b) distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b]) model = Model([input_a, input_b], distance) # train rms = RMSprop() model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy]) model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y, batch_size=128, epochs=epochs, validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y)) # compute final accuracy on training and test sets y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]]) tr_acc = compute_accuracy(tr_y, y_pred) y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]]) te_acc = compute_accuracy(te_y, y_pred) print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc)) print('* Accuracy on test set: %0.2f%%' % (100 * te_acc)) Keras-2.2.4/examples/mnist_acgan.py0000644000000000116100000003143013354530144016777 0ustar rooteng00000000000000# -*- coding: utf-8 -*- """ Train an Auxiliary Classifier Generative Adversarial Network (ACGAN) on the MNIST dataset. See https://arxiv.org/abs/1610.09585 for more details. You should start to see reasonable images after ~5 epochs, and good images by ~15 epochs. You should use a GPU, as the convolution-heavy operations are very slow on the CPU. Prefer the TensorFlow backend if you plan on iterating, as the compilation time can be a blocker using Theano. Timings: Hardware | Backend | Time / Epoch ------------------------------------------- CPU | TF | 3 hrs Titan X (maxwell) | TF | 4 min Titan X (maxwell) | TH | 7 min Consult https://github.com/lukedeo/keras-acgan for more information and example output """ from __future__ import print_function from collections import defaultdict try: import cPickle as pickle except ImportError: import pickle from PIL import Image from six.moves import range from keras.datasets import mnist from keras import layers from keras.layers import Input, Dense, Reshape, Flatten, Embedding, Dropout from keras.layers import BatchNormalization from keras.layers.advanced_activations import LeakyReLU from keras.layers.convolutional import Conv2DTranspose, Conv2D from keras.models import Sequential, Model from keras.optimizers import Adam from keras.utils.generic_utils import Progbar import numpy as np np.random.seed(1337) num_classes = 10 def build_generator(latent_size): # we will map a pair of (z, L), where z is a latent vector and L is a # label drawn from P_c, to image space (..., 28, 28, 1) cnn = Sequential() cnn.add(Dense(3 * 3 * 384, input_dim=latent_size, activation='relu')) cnn.add(Reshape((3, 3, 384))) # upsample to (7, 7, ...) cnn.add(Conv2DTranspose(192, 5, strides=1, padding='valid', activation='relu', kernel_initializer='glorot_normal')) cnn.add(BatchNormalization()) # upsample to (14, 14, ...) cnn.add(Conv2DTranspose(96, 5, strides=2, padding='same', activation='relu', kernel_initializer='glorot_normal')) cnn.add(BatchNormalization()) # upsample to (28, 28, ...) cnn.add(Conv2DTranspose(1, 5, strides=2, padding='same', activation='tanh', kernel_initializer='glorot_normal')) # this is the z space commonly referred to in GAN papers latent = Input(shape=(latent_size, )) # this will be our label image_class = Input(shape=(1,), dtype='int32') cls = Flatten()(Embedding(num_classes, latent_size, embeddings_initializer='glorot_normal')(image_class)) # hadamard product between z-space and a class conditional embedding h = layers.multiply([latent, cls]) fake_image = cnn(h) return Model([latent, image_class], fake_image) def build_discriminator(): # build a relatively standard conv net, with LeakyReLUs as suggested in # the reference paper cnn = Sequential() cnn.add(Conv2D(32, 3, padding='same', strides=2, input_shape=(28, 28, 1))) cnn.add(LeakyReLU(0.2)) cnn.add(Dropout(0.3)) cnn.add(Conv2D(64, 3, padding='same', strides=1)) cnn.add(LeakyReLU(0.2)) cnn.add(Dropout(0.3)) cnn.add(Conv2D(128, 3, padding='same', strides=2)) cnn.add(LeakyReLU(0.2)) cnn.add(Dropout(0.3)) cnn.add(Conv2D(256, 3, padding='same', strides=1)) cnn.add(LeakyReLU(0.2)) cnn.add(Dropout(0.3)) cnn.add(Flatten()) image = Input(shape=(28, 28, 1)) features = cnn(image) # first output (name=generation) is whether or not the discriminator # thinks the image that is being shown is fake, and the second output # (name=auxiliary) is the class that the discriminator thinks the image # belongs to. fake = Dense(1, activation='sigmoid', name='generation')(features) aux = Dense(num_classes, activation='softmax', name='auxiliary')(features) return Model(image, [fake, aux]) if __name__ == '__main__': # batch and latent size taken from the paper epochs = 100 batch_size = 100 latent_size = 100 # Adam parameters suggested in https://arxiv.org/abs/1511.06434 adam_lr = 0.0002 adam_beta_1 = 0.5 # build the discriminator print('Discriminator model:') discriminator = build_discriminator() discriminator.compile( optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1), loss=['binary_crossentropy', 'sparse_categorical_crossentropy'] ) discriminator.summary() # build the generator generator = build_generator(latent_size) latent = Input(shape=(latent_size, )) image_class = Input(shape=(1,), dtype='int32') # get a fake image fake = generator([latent, image_class]) # we only want to be able to train generation for the combined model discriminator.trainable = False fake, aux = discriminator(fake) combined = Model([latent, image_class], [fake, aux]) print('Combined model:') combined.compile( optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1), loss=['binary_crossentropy', 'sparse_categorical_crossentropy'] ) combined.summary() # get our mnist data, and force it to be of shape (..., 28, 28, 1) with # range [-1, 1] (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = (x_train.astype(np.float32) - 127.5) / 127.5 x_train = np.expand_dims(x_train, axis=-1) x_test = (x_test.astype(np.float32) - 127.5) / 127.5 x_test = np.expand_dims(x_test, axis=-1) num_train, num_test = x_train.shape[0], x_test.shape[0] train_history = defaultdict(list) test_history = defaultdict(list) for epoch in range(1, epochs + 1): print('Epoch {}/{}'.format(epoch, epochs)) num_batches = int(np.ceil(x_train.shape[0] / float(batch_size))) progress_bar = Progbar(target=num_batches) epoch_gen_loss = [] epoch_disc_loss = [] for index in range(num_batches): # get a batch of real images image_batch = x_train[index * batch_size:(index + 1) * batch_size] label_batch = y_train[index * batch_size:(index + 1) * batch_size] # generate a new batch of noise noise = np.random.uniform(-1, 1, (len(image_batch), latent_size)) # sample some labels from p_c sampled_labels = np.random.randint(0, num_classes, len(image_batch)) # generate a batch of fake images, using the generated labels as a # conditioner. We reshape the sampled labels to be # (len(image_batch), 1) so that we can feed them into the embedding # layer as a length one sequence generated_images = generator.predict( [noise, sampled_labels.reshape((-1, 1))], verbose=0) x = np.concatenate((image_batch, generated_images)) # use one-sided soft real/fake labels # Salimans et al., 2016 # https://arxiv.org/pdf/1606.03498.pdf (Section 3.4) soft_zero, soft_one = 0, 0.95 y = np.array( [soft_one] * len(image_batch) + [soft_zero] * len(image_batch)) aux_y = np.concatenate((label_batch, sampled_labels), axis=0) # we don't want the discriminator to also maximize the classification # accuracy of the auxilary classifier on generated images, so we # don't train discriminator to produce class labels for generated # images (see https://openreview.net/forum?id=rJXTf9Bxg). # To preserve sum of sample weights for the auxilary classifier, # we assign sample weight of 2 to the real images. disc_sample_weight = [np.ones(2 * len(image_batch)), np.concatenate((np.ones(len(image_batch)) * 2, np.zeros(len(image_batch))))] # see if the discriminator can figure itself out... epoch_disc_loss.append(discriminator.train_on_batch( x, [y, aux_y], sample_weight=disc_sample_weight)) # make new noise. we generate 2 * batch size here such that we have # the generator optimize over an identical number of images as the # discriminator noise = np.random.uniform(-1, 1, (2 * len(image_batch), latent_size)) sampled_labels = np.random.randint(0, num_classes, 2 * len(image_batch)) # we want to train the generator to trick the discriminator # For the generator, we want all the {fake, not-fake} labels to say # not-fake trick = np.ones(2 * len(image_batch)) * soft_one epoch_gen_loss.append(combined.train_on_batch( [noise, sampled_labels.reshape((-1, 1))], [trick, sampled_labels])) progress_bar.update(index + 1) print('Testing for epoch {}:'.format(epoch)) # evaluate the testing loss here # generate a new batch of noise noise = np.random.uniform(-1, 1, (num_test, latent_size)) # sample some labels from p_c and generate images from them sampled_labels = np.random.randint(0, num_classes, num_test) generated_images = generator.predict( [noise, sampled_labels.reshape((-1, 1))], verbose=False) x = np.concatenate((x_test, generated_images)) y = np.array([1] * num_test + [0] * num_test) aux_y = np.concatenate((y_test, sampled_labels), axis=0) # see if the discriminator can figure itself out... discriminator_test_loss = discriminator.evaluate( x, [y, aux_y], verbose=False) discriminator_train_loss = np.mean(np.array(epoch_disc_loss), axis=0) # make new noise noise = np.random.uniform(-1, 1, (2 * num_test, latent_size)) sampled_labels = np.random.randint(0, num_classes, 2 * num_test) trick = np.ones(2 * num_test) generator_test_loss = combined.evaluate( [noise, sampled_labels.reshape((-1, 1))], [trick, sampled_labels], verbose=False) generator_train_loss = np.mean(np.array(epoch_gen_loss), axis=0) # generate an epoch report on performance train_history['generator'].append(generator_train_loss) train_history['discriminator'].append(discriminator_train_loss) test_history['generator'].append(generator_test_loss) test_history['discriminator'].append(discriminator_test_loss) print('{0:<22s} | {1:4s} | {2:15s} | {3:5s}'.format( 'component', *discriminator.metrics_names)) print('-' * 65) ROW_FMT = '{0:<22s} | {1:<4.2f} | {2:<15.4f} | {3:<5.4f}' print(ROW_FMT.format('generator (train)', *train_history['generator'][-1])) print(ROW_FMT.format('generator (test)', *test_history['generator'][-1])) print(ROW_FMT.format('discriminator (train)', *train_history['discriminator'][-1])) print(ROW_FMT.format('discriminator (test)', *test_history['discriminator'][-1])) # save weights every epoch generator.save_weights( 'params_generator_epoch_{0:03d}.hdf5'.format(epoch), True) discriminator.save_weights( 'params_discriminator_epoch_{0:03d}.hdf5'.format(epoch), True) # generate some digits to display num_rows = 40 noise = np.tile(np.random.uniform(-1, 1, (num_rows, latent_size)), (num_classes, 1)) sampled_labels = np.array([ [i] * num_rows for i in range(num_classes) ]).reshape(-1, 1) # get a batch to display generated_images = generator.predict( [noise, sampled_labels], verbose=0) # prepare real images sorted by class label real_labels = y_train[(epoch - 1) * num_rows * num_classes: epoch * num_rows * num_classes] indices = np.argsort(real_labels, axis=0) real_images = x_train[(epoch - 1) * num_rows * num_classes: epoch * num_rows * num_classes][indices] # display generated images, white separator, real images img = np.concatenate( (generated_images, np.repeat(np.ones_like(x_train[:1]), num_rows, axis=0), real_images)) # arrange them into a grid img = (np.concatenate([r.reshape(-1, 28) for r in np.split(img, 2 * num_classes + 1) ], axis=-1) * 127.5 + 127.5).astype(np.uint8) Image.fromarray(img).save( 'plot_epoch_{0:03d}_generated.png'.format(epoch)) with open('acgan-history.pkl', 'wb') as f: pickle.dump({'train': train_history, 'test': test_history}, f) Keras-2.2.4/examples/conv_filter_visualization.py0000644000000000116100000001040613342055016022004 0ustar rooteng00000000000000'''Visualization of the filters of VGG16, via gradient ascent in input space. This script can run on CPU in a few minutes. Results example: http://i.imgur.com/4nj4KjN.jpg ''' from __future__ import print_function import numpy as np import time from keras.preprocessing.image import save_img from keras.applications import vgg16 from keras import backend as K # dimensions of the generated pictures for each filter. img_width = 128 img_height = 128 # the name of the layer we want to visualize # (see model definition at keras/applications/vgg16.py) layer_name = 'block5_conv1' # util function to convert a tensor into a valid image def deprocess_image(x): # normalize tensor: center on 0., ensure std is 0.1 x -= x.mean() x /= (x.std() + K.epsilon()) x *= 0.1 # clip to [0, 1] x += 0.5 x = np.clip(x, 0, 1) # convert to RGB array x *= 255 if K.image_data_format() == 'channels_first': x = x.transpose((1, 2, 0)) x = np.clip(x, 0, 255).astype('uint8') return x # build the VGG16 network with ImageNet weights model = vgg16.VGG16(weights='imagenet', include_top=False) print('Model loaded.') model.summary() # this is the placeholder for the input images input_img = model.input # get the symbolic outputs of each "key" layer (we gave them unique names). layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) def normalize(x): # utility function to normalize a tensor by its L2 norm return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon()) kept_filters = [] for filter_index in range(200): # we only scan through the first 200 filters, # but there are actually 512 of them print('Processing filter %d' % filter_index) start_time = time.time() # we build a loss function that maximizes the activation # of the nth filter of the layer considered layer_output = layer_dict[layer_name].output if K.image_data_format() == 'channels_first': loss = K.mean(layer_output[:, filter_index, :, :]) else: loss = K.mean(layer_output[:, :, :, filter_index]) # we compute the gradient of the input picture wrt this loss grads = K.gradients(loss, input_img)[0] # normalization trick: we normalize the gradient grads = normalize(grads) # this function returns the loss and grads given the input picture iterate = K.function([input_img], [loss, grads]) # step size for gradient ascent step = 1. # we start from a gray image with some random noise if K.image_data_format() == 'channels_first': input_img_data = np.random.random((1, 3, img_width, img_height)) else: input_img_data = np.random.random((1, img_width, img_height, 3)) input_img_data = (input_img_data - 0.5) * 20 + 128 # we run gradient ascent for 20 steps for i in range(20): loss_value, grads_value = iterate([input_img_data]) input_img_data += grads_value * step print('Current loss value:', loss_value) if loss_value <= 0.: # some filters get stuck to 0, we can skip them break # decode the resulting input image if loss_value > 0: img = deprocess_image(input_img_data[0]) kept_filters.append((img, loss_value)) end_time = time.time() print('Filter %d processed in %ds' % (filter_index, end_time - start_time)) # we will stich the best 64 filters on a 8 x 8 grid. n = 8 # the filters that have the highest loss are assumed to be better-looking. # we will only keep the top 64 filters. kept_filters.sort(key=lambda x: x[1], reverse=True) kept_filters = kept_filters[:n * n] # build a black picture with enough space for # our 8 x 8 filters of size 128 x 128, with a 5px margin in between margin = 5 width = n * img_width + (n - 1) * margin height = n * img_height + (n - 1) * margin stitched_filters = np.zeros((width, height, 3)) # fill the picture with our saved filters for i in range(n): for j in range(n): img, loss = kept_filters[i * n + j] width_margin = (img_width + margin) * i height_margin = (img_height + margin) * j stitched_filters[ width_margin: width_margin + img_width, height_margin: height_margin + img_height, :] = img # save the result to disk save_img('stitched_filters_%dx%d.png' % (n, n), stitched_filters) Keras-2.2.4/examples/mnist_net2net.py0000644000000000116100000003774113342055016017315 0ustar rooteng00000000000000'''This is an implementation of Net2Net experiment with MNIST in 'Net2Net: Accelerating Learning via Knowledge Transfer' by Tianqi Chen, Ian Goodfellow, and Jonathon Shlens arXiv:1511.05641v4 [cs.LG] 23 Apr 2016 http://arxiv.org/abs/1511.05641 # Notes - What: + Net2Net is a group of methods to transfer knowledge from a teacher neural net to a student net,so that the student net can be trained faster than from scratch. + The paper discussed two specific methods of Net2Net, i.e. Net2WiderNet and Net2DeeperNet. + Net2WiderNet replaces a model with an equivalent wider model that has more units in each hidden layer. + Net2DeeperNet replaces a model with an equivalent deeper model. + Both are based on the idea of 'function-preserving transformations of neural nets'. - Why: + Enable fast exploration of multiple neural nets in experimentation and design process,by creating a series of wider and deeper models with transferable knowledge. + Enable 'lifelong learning system' by gradually adjusting model complexity to data availability,and reusing transferable knowledge. # Experiments - Teacher model: a basic CNN model trained on MNIST for 3 epochs. - Net2WiderNet experiment: + Student model has a wider Conv2D layer and a wider FC layer. + Comparison of 'random-padding' vs 'net2wider' weight initialization. + With both methods, after 1 epoch, student model should perform as well as teacher model, but 'net2wider' is slightly better. - Net2DeeperNet experiment: + Student model has an extra Conv2D layer and an extra FC layer. + Comparison of 'random-init' vs 'net2deeper' weight initialization. + After 1 epoch, performance of 'net2deeper' is better than 'random-init'. - Hyper-parameters: + SGD with momentum=0.9 is used for training teacher and student models. + Learning rate adjustment: it's suggested to reduce learning rate to 1/10 for student model. + Addition of noise in 'net2wider' is used to break weight symmetry and thus enable full capacity of student models. It is optional when a Dropout layer is used. # Results - Tested with TF backend and 'channels_last' image_data_format. - Running on GPU GeForce GTX Titan X Maxwell - Performance Comparisons - validation loss values during first 3 epochs: Teacher model ... (0) teacher_model: 0.0537 0.0354 0.0356 Experiment of Net2WiderNet ... (1) wider_random_pad: 0.0320 0.0317 0.0289 (2) wider_net2wider: 0.0271 0.0274 0.0270 Experiment of Net2DeeperNet ... (3) deeper_random_init: 0.0682 0.0506 0.0468 (4) deeper_net2deeper: 0.0292 0.0294 0.0286 ''' from __future__ import print_function import numpy as np import keras from keras import backend as K from keras.models import Sequential from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten from keras.optimizers import SGD from keras.datasets import mnist if K.image_data_format() == 'channels_first': input_shape = (1, 28, 28) # image shape else: input_shape = (28, 28, 1) # image shape num_classes = 10 # number of classes epochs = 3 # load and pre-process data def preprocess_input(x): return x.astype('float32').reshape((-1,) + input_shape) / 255 def preprocess_output(y): return keras.utils.to_categorical(y) (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = map(preprocess_input, [x_train, x_test]) y_train, y_test = map(preprocess_output, [y_train, y_test]) print('Loading MNIST data...') print('x_train shape:', x_train.shape, 'y_train shape:', y_train.shape) print('x_test shape:', x_test.shape, 'y_test shape', y_test.shape) # knowledge transfer algorithms def wider2net_conv2d(teacher_w1, teacher_b1, teacher_w2, new_width, init): '''Get initial weights for a wider conv2d layer with a bigger filters, by 'random-padding' or 'net2wider'. # Arguments teacher_w1: `weight` of conv2d layer to become wider, of shape (filters1, num_channel1, kh1, kw1) teacher_b1: `bias` of conv2d layer to become wider, of shape (filters1, ) teacher_w2: `weight` of next connected conv2d layer, of shape (filters2, num_channel2, kh2, kw2) new_width: new `filters` for the wider conv2d layer init: initialization algorithm for new weights, either 'random-pad' or 'net2wider' ''' assert teacher_w1.shape[0] == teacher_w2.shape[1], ( 'successive layers from teacher model should have compatible shapes') assert teacher_w1.shape[3] == teacher_b1.shape[0], ( 'weight and bias from same layer should have compatible shapes') assert new_width > teacher_w1.shape[3], ( 'new width (filters) should be bigger than the existing one') n = new_width - teacher_w1.shape[3] if init == 'random-pad': new_w1 = np.random.normal(0, 0.1, size=teacher_w1.shape[:3] + (n,)) new_b1 = np.ones(n) * 0.1 new_w2 = np.random.normal( 0, 0.1, size=teacher_w2.shape[:2] + (n, teacher_w2.shape[3])) elif init == 'net2wider': index = np.random.randint(teacher_w1.shape[3], size=n) factors = np.bincount(index)[index] + 1. new_w1 = teacher_w1[:, :, :, index] new_b1 = teacher_b1[index] new_w2 = teacher_w2[:, :, index, :] / factors.reshape((1, 1, -1, 1)) else: raise ValueError('Unsupported weight initializer: %s' % init) student_w1 = np.concatenate((teacher_w1, new_w1), axis=3) if init == 'random-pad': student_w2 = np.concatenate((teacher_w2, new_w2), axis=2) elif init == 'net2wider': # add small noise to break symmetry, so that student model will have # full capacity later noise = np.random.normal(0, 5e-2 * new_w2.std(), size=new_w2.shape) student_w2 = np.concatenate((teacher_w2, new_w2 + noise), axis=2) student_w2[:, :, index, :] = new_w2 student_b1 = np.concatenate((teacher_b1, new_b1), axis=0) return student_w1, student_b1, student_w2 def wider2net_fc(teacher_w1, teacher_b1, teacher_w2, new_width, init): '''Get initial weights for a wider fully connected (dense) layer with a bigger nout, by 'random-padding' or 'net2wider'. # Arguments teacher_w1: `weight` of fc layer to become wider, of shape (nin1, nout1) teacher_b1: `bias` of fc layer to become wider, of shape (nout1, ) teacher_w2: `weight` of next connected fc layer, of shape (nin2, nout2) new_width: new `nout` for the wider fc layer init: initialization algorithm for new weights, either 'random-pad' or 'net2wider' ''' assert teacher_w1.shape[1] == teacher_w2.shape[0], ( 'successive layers from teacher model should have compatible shapes') assert teacher_w1.shape[1] == teacher_b1.shape[0], ( 'weight and bias from same layer should have compatible shapes') assert new_width > teacher_w1.shape[1], ( 'new width (nout) should be bigger than the existing one') n = new_width - teacher_w1.shape[1] if init == 'random-pad': new_w1 = np.random.normal(0, 0.1, size=(teacher_w1.shape[0], n)) new_b1 = np.ones(n) * 0.1 new_w2 = np.random.normal(0, 0.1, size=(n, teacher_w2.shape[1])) elif init == 'net2wider': index = np.random.randint(teacher_w1.shape[1], size=n) factors = np.bincount(index)[index] + 1. new_w1 = teacher_w1[:, index] new_b1 = teacher_b1[index] new_w2 = teacher_w2[index, :] / factors[:, np.newaxis] else: raise ValueError('Unsupported weight initializer: %s' % init) student_w1 = np.concatenate((teacher_w1, new_w1), axis=1) if init == 'random-pad': student_w2 = np.concatenate((teacher_w2, new_w2), axis=0) elif init == 'net2wider': # add small noise to break symmetry, so that student model will have # full capacity later noise = np.random.normal(0, 5e-2 * new_w2.std(), size=new_w2.shape) student_w2 = np.concatenate((teacher_w2, new_w2 + noise), axis=0) student_w2[index, :] = new_w2 student_b1 = np.concatenate((teacher_b1, new_b1), axis=0) return student_w1, student_b1, student_w2 def deeper2net_conv2d(teacher_w): '''Get initial weights for a deeper conv2d layer by net2deeper'. # Arguments teacher_w: `weight` of previous conv2d layer, of shape (kh, kw, num_channel, filters) ''' kh, kw, num_channel, filters = teacher_w.shape student_w = np.zeros_like(teacher_w) for i in range(filters): student_w[(kh - 1) // 2, (kw - 1) // 2, i, i] = 1. student_b = np.zeros(filters) return student_w, student_b def copy_weights(teacher_model, student_model, layer_names): '''Copy weights from teacher_model to student_model, for layers with names listed in layer_names ''' for name in layer_names: weights = teacher_model.get_layer(name=name).get_weights() student_model.get_layer(name=name).set_weights(weights) # methods to construct teacher_model and student_models def make_teacher_model(x_train, y_train, x_test, y_test, epochs): '''Train and benchmark performance of a simple CNN. (0) Teacher model ''' model = Sequential() model.add(Conv2D(64, 3, input_shape=input_shape, padding='same', name='conv1')) model.add(MaxPooling2D(2, name='pool1')) model.add(Conv2D(64, 3, padding='same', name='conv2')) model.add(MaxPooling2D(2, name='pool2')) model.add(Flatten(name='flatten')) model.add(Dense(64, activation='relu', name='fc1')) model.add(Dense(num_classes, activation='softmax', name='fc2')) model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.01, momentum=0.9), metrics=['accuracy']) model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test)) return model def make_wider_student_model(teacher_model, x_train, y_train, x_test, y_test, init, epochs): '''Train a wider student model based on teacher_model, with either 'random-pad' (baseline) or 'net2wider' ''' new_conv1_width = 128 new_fc1_width = 128 model = Sequential() # a wider conv1 compared to teacher_model model.add(Conv2D(new_conv1_width, 3, input_shape=input_shape, padding='same', name='conv1')) model.add(MaxPooling2D(2, name='pool1')) model.add(Conv2D(64, 3, padding='same', name='conv2')) model.add(MaxPooling2D(2, name='pool2')) model.add(Flatten(name='flatten')) # a wider fc1 compared to teacher model model.add(Dense(new_fc1_width, activation='relu', name='fc1')) model.add(Dense(num_classes, activation='softmax', name='fc2')) # The weights for other layers need to be copied from teacher_model # to student_model, except for widened layers # and their immediate downstreams, which will be initialized separately. # For this example there are no other layers that need to be copied. w_conv1, b_conv1 = teacher_model.get_layer('conv1').get_weights() w_conv2, b_conv2 = teacher_model.get_layer('conv2').get_weights() new_w_conv1, new_b_conv1, new_w_conv2 = wider2net_conv2d( w_conv1, b_conv1, w_conv2, new_conv1_width, init) model.get_layer('conv1').set_weights([new_w_conv1, new_b_conv1]) model.get_layer('conv2').set_weights([new_w_conv2, b_conv2]) w_fc1, b_fc1 = teacher_model.get_layer('fc1').get_weights() w_fc2, b_fc2 = teacher_model.get_layer('fc2').get_weights() new_w_fc1, new_b_fc1, new_w_fc2 = wider2net_fc( w_fc1, b_fc1, w_fc2, new_fc1_width, init) model.get_layer('fc1').set_weights([new_w_fc1, new_b_fc1]) model.get_layer('fc2').set_weights([new_w_fc2, b_fc2]) model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.001, momentum=0.9), metrics=['accuracy']) model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test)) def make_deeper_student_model(teacher_model, x_train, y_train, x_test, y_test, init, epochs): '''Train a deeper student model based on teacher_model, with either 'random-init' (baseline) or 'net2deeper' ''' model = Sequential() model.add(Conv2D(64, 3, input_shape=input_shape, padding='same', name='conv1')) model.add(MaxPooling2D(2, name='pool1')) model.add(Conv2D(64, 3, padding='same', name='conv2')) # add another conv2d layer to make original conv2 deeper if init == 'net2deeper': prev_w, _ = model.get_layer('conv2').get_weights() new_weights = deeper2net_conv2d(prev_w) model.add(Conv2D(64, 3, padding='same', name='conv2-deeper', weights=new_weights)) elif init == 'random-init': model.add(Conv2D(64, 3, padding='same', name='conv2-deeper')) else: raise ValueError('Unsupported weight initializer: %s' % init) model.add(MaxPooling2D(2, name='pool2')) model.add(Flatten(name='flatten')) model.add(Dense(64, activation='relu', name='fc1')) # add another fc layer to make original fc1 deeper if init == 'net2deeper': # net2deeper for fc layer with relu, is just an identity initializer model.add(Dense(64, kernel_initializer='identity', activation='relu', name='fc1-deeper')) elif init == 'random-init': model.add(Dense(64, activation='relu', name='fc1-deeper')) else: raise ValueError('Unsupported weight initializer: %s' % init) model.add(Dense(num_classes, activation='softmax', name='fc2')) # copy weights for other layers copy_weights(teacher_model, model, layer_names=[ 'conv1', 'conv2', 'fc1', 'fc2']) model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.001, momentum=0.9), metrics=['accuracy']) model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test)) # experiments setup def net2wider_experiment(): '''Benchmark performances of (1) a wider student model with `random_pad` initializer (2) a wider student model with `Net2WiderNet` initializer ''' print('\nExperiment of Net2WiderNet ...') print('\n(1) building wider student model by random padding ...') make_wider_student_model(teacher_model, x_train, y_train, x_test, y_test, init='random-pad', epochs=epochs) print('\n(2) building wider student model by net2wider ...') make_wider_student_model(teacher_model, x_train, y_train, x_test, y_test, init='net2wider', epochs=epochs) def net2deeper_experiment(): '''Benchmark performances of (3) a deeper student model with `random_init` initializer (4) a deeper student model with `Net2DeeperNet` initializer ''' print('\nExperiment of Net2DeeperNet ...') print('\n(3) building deeper student model by random init ...') make_deeper_student_model(teacher_model, x_train, y_train, x_test, y_test, init='random-init', epochs=epochs) print('\n(4) building deeper student model by net2deeper ...') make_deeper_student_model(teacher_model, x_train, y_train, x_test, y_test, init='net2deeper', epochs=epochs) print('\n(0) building teacher model ...') teacher_model = make_teacher_model(x_train, y_train, x_test, y_test, epochs=epochs) # run the experiments net2wider_experiment() net2deeper_experiment() Keras-2.2.4/examples/imdb_cnn_lstm.py0000644000000000116100000000370713146670577017350 0ustar rooteng00000000000000'''Train a recurrent convolutional network on the IMDB sentiment classification task. Gets to 0.8498 test accuracy after 2 epochs. 41s/epoch on K520 GPU. ''' from __future__ import print_function from keras.preprocessing import sequence from keras.models import Sequential from keras.layers import Dense, Dropout, Activation from keras.layers import Embedding from keras.layers import LSTM from keras.layers import Conv1D, MaxPooling1D from keras.datasets import imdb # Embedding max_features = 20000 maxlen = 100 embedding_size = 128 # Convolution kernel_size = 5 filters = 64 pool_size = 4 # LSTM lstm_output_size = 70 # Training batch_size = 30 epochs = 2 ''' Note: batch_size is highly sensitive. Only 2 epochs are needed as the dataset is very small. ''' print('Loading data...') (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) print('Build model...') model = Sequential() model.add(Embedding(max_features, embedding_size, input_length=maxlen)) model.add(Dropout(0.25)) model.add(Conv1D(filters, kernel_size, padding='valid', activation='relu', strides=1)) model.add(MaxPooling1D(pool_size=pool_size)) model.add(LSTM(lstm_output_size)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print('Train...') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test)) score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) print('Test score:', score) print('Test accuracy:', acc) Keras-2.2.4/examples/reuters_mlp_relu_vs_selu.py0000644000000000116100000001276413202372135021651 0ustar rooteng00000000000000'''Compares self-normalizing MLPs with regular MLPs. Compares the performance of a simple MLP using two different activation functions: RELU and SELU on the Reuters newswire topic classification task. # Reference - Klambauer, G., Unterthiner, T., Mayr, A., & Hochreiter, S. (2017). Self-Normalizing Neural Networks. arXiv preprint arXiv:1706.02515. https://arxiv.org/abs/1706.02515 ''' from __future__ import print_function import numpy as np import matplotlib.pyplot as plt import keras from keras.datasets import reuters from keras.models import Sequential from keras.layers import Dense, Activation, Dropout from keras.layers.noise import AlphaDropout from keras.preprocessing.text import Tokenizer max_words = 1000 batch_size = 16 epochs = 40 plot = True def create_network(n_dense=6, dense_units=16, activation='selu', dropout=AlphaDropout, dropout_rate=0.1, kernel_initializer='lecun_normal', optimizer='adam', num_classes=1, max_words=max_words): """Generic function to create a fully-connected neural network. # Arguments n_dense: int > 0. Number of dense layers. dense_units: int > 0. Number of dense units per layer. dropout: keras.layers.Layer. A dropout layer to apply. dropout_rate: 0 <= float <= 1. The rate of dropout. kernel_initializer: str. The initializer for the weights. optimizer: str/keras.optimizers.Optimizer. The optimizer to use. num_classes: int > 0. The number of classes to predict. max_words: int > 0. The maximum number of words per data point. # Returns A Keras model instance (compiled). """ model = Sequential() model.add(Dense(dense_units, input_shape=(max_words,), kernel_initializer=kernel_initializer)) model.add(Activation(activation)) model.add(dropout(dropout_rate)) for i in range(n_dense - 1): model.add(Dense(dense_units, kernel_initializer=kernel_initializer)) model.add(Activation(activation)) model.add(dropout(dropout_rate)) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model network1 = { 'n_dense': 6, 'dense_units': 16, 'activation': 'relu', 'dropout': Dropout, 'dropout_rate': 0.5, 'kernel_initializer': 'glorot_uniform', 'optimizer': 'sgd' } network2 = { 'n_dense': 6, 'dense_units': 16, 'activation': 'selu', 'dropout': AlphaDropout, 'dropout_rate': 0.1, 'kernel_initializer': 'lecun_normal', 'optimizer': 'sgd' } print('Loading data...') (x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words, test_split=0.2) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') num_classes = np.max(y_train) + 1 print(num_classes, 'classes') print('Vectorizing sequence data...') tokenizer = Tokenizer(num_words=max_words) x_train = tokenizer.sequences_to_matrix(x_train, mode='binary') x_test = tokenizer.sequences_to_matrix(x_test, mode='binary') print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) print('Convert class vector to binary class matrix ' '(for use with categorical_crossentropy)') y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) print('y_train shape:', y_train.shape) print('y_test shape:', y_test.shape) print('\nBuilding network 1...') model1 = create_network(num_classes=num_classes, **network1) history_model1 = model1.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.1) score_model1 = model1.evaluate(x_test, y_test, batch_size=batch_size, verbose=1) print('\nBuilding network 2...') model2 = create_network(num_classes=num_classes, **network2) history_model2 = model2.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.1) score_model2 = model2.evaluate(x_test, y_test, batch_size=batch_size, verbose=1) print('\nNetwork 1 results') print('Hyperparameters:', network1) print('Test score:', score_model1[0]) print('Test accuracy:', score_model1[1]) print('Network 2 results') print('Hyperparameters:', network2) print('Test score:', score_model2[0]) print('Test accuracy:', score_model2[1]) plt.plot(range(epochs), history_model1.history['val_loss'], 'g-', label='Network 1 Val Loss') plt.plot(range(epochs), history_model2.history['val_loss'], 'r-', label='Network 2 Val Loss') plt.plot(range(epochs), history_model1.history['loss'], 'g--', label='Network 1 Loss') plt.plot(range(epochs), history_model2.history['loss'], 'r--', label='Network 2 Loss') plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.savefig('comparison_of_networks.png') Keras-2.2.4/examples/mnist_transfer_cnn.py0000644000000000116100000000704613247612467020430 0ustar rooteng00000000000000'''Transfer learning toy example. 1 - Train a simple convnet on the MNIST dataset the first 5 digits [0..4]. 2 - Freeze convolutional layers and fine-tune dense layers for the classification of digits [5..9]. Get to 99.8% test accuracy after 5 epochs for the first five digits classifier and 99.2% for the last five digits after transfer + fine-tuning. ''' from __future__ import print_function import datetime import keras from keras.datasets import mnist from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten from keras.layers import Conv2D, MaxPooling2D from keras import backend as K now = datetime.datetime.now batch_size = 128 num_classes = 5 epochs = 5 # input image dimensions img_rows, img_cols = 28, 28 # number of convolutional filters to use filters = 32 # size of pooling area for max pooling pool_size = 2 # convolution kernel size kernel_size = 3 if K.image_data_format() == 'channels_first': input_shape = (1, img_rows, img_cols) else: input_shape = (img_rows, img_cols, 1) def train_model(model, train, test, num_classes): x_train = train[0].reshape((train[0].shape[0],) + input_shape) x_test = test[0].reshape((test[0].shape[0],) + input_shape) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(train[1], num_classes) y_test = keras.utils.to_categorical(test[1], num_classes) model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']) t = now() model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) print('Training time: %s' % (now() - t)) score = model.evaluate(x_test, y_test, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() # create two datasets one with digits below 5 and one with 5 and above x_train_lt5 = x_train[y_train < 5] y_train_lt5 = y_train[y_train < 5] x_test_lt5 = x_test[y_test < 5] y_test_lt5 = y_test[y_test < 5] x_train_gte5 = x_train[y_train >= 5] y_train_gte5 = y_train[y_train >= 5] - 5 x_test_gte5 = x_test[y_test >= 5] y_test_gte5 = y_test[y_test >= 5] - 5 # define two groups of layers: feature (convolutions) and classification (dense) feature_layers = [ Conv2D(filters, kernel_size, padding='valid', input_shape=input_shape), Activation('relu'), Conv2D(filters, kernel_size), Activation('relu'), MaxPooling2D(pool_size=pool_size), Dropout(0.25), Flatten(), ] classification_layers = [ Dense(128), Activation('relu'), Dropout(0.5), Dense(num_classes), Activation('softmax') ] # create complete model model = Sequential(feature_layers + classification_layers) # train model for 5-digit classification [0..4] train_model(model, (x_train_lt5, y_train_lt5), (x_test_lt5, y_test_lt5), num_classes) # freeze feature layers and rebuild model for l in feature_layers: l.trainable = False # transfer: train dense layers for new classification task [5..9] train_model(model, (x_train_gte5, y_train_gte5), (x_test_gte5, y_test_gte5), num_classes) Keras-2.2.4/examples/mnist_swwae.py0000644000000000116100000001710213247612467017066 0ustar rooteng00000000000000'''Trains a stacked what-where autoencoder built on residual blocks on the MNIST dataset. It exemplifies two influential methods that have been developed in the past few years. The first is the idea of properly 'unpooling.' During any max pool, the exact location (the 'where') of the maximal value in a pooled receptive field is lost, however it can be very useful in the overall reconstruction of an input image. Therefore, if the 'where' is handed from the encoder to the corresponding decoder layer, features being decoded can be 'placed' in the right location, allowing for reconstructions of much higher fidelity. # References - Visualizing and Understanding Convolutional Networks Matthew D Zeiler, Rob Fergus https://arxiv.org/abs/1311.2901v3 - Stacked What-Where Auto-encoders Junbo Zhao, Michael Mathieu, Ross Goroshin, Yann LeCun https://arxiv.org/abs/1506.02351v8 The second idea exploited here is that of residual learning. Residual blocks ease the training process by allowing skip connections that give the network the ability to be as linear (or non-linear) as the data sees fit. This allows for much deep networks to be easily trained. The residual element seems to be advantageous in the context of this example as it allows a nice symmetry between the encoder and decoder. Normally, in the decoder, the final projection to the space where the image is reconstructed is linear, however this does not have to be the case for a residual block as the degree to which its output is linear or non-linear is determined by the data it is fed. However, in order to cap the reconstruction in this example, a hard softmax is applied as a bias because we know the MNIST digits are mapped to [0, 1]. # References - Deep Residual Learning for Image Recognition Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun https://arxiv.org/abs/1512.03385v1 - Identity Mappings in Deep Residual Networks Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun https://arxiv.org/abs/1603.05027v3 ''' from __future__ import print_function import numpy as np from keras.datasets import mnist from keras.models import Model from keras.layers import Activation from keras.layers import UpSampling2D, Conv2D, MaxPooling2D from keras.layers import Input, BatchNormalization, ELU import matplotlib.pyplot as plt import keras.backend as K from keras import layers def convresblock(x, nfeats=8, ksize=3, nskipped=2, elu=True): """The proposed residual block from [4]. Running with elu=True will use ELU nonlinearity and running with elu=False will use BatchNorm + RELU nonlinearity. While ELU's are fast due to the fact they do not suffer from BatchNorm overhead, they may overfit because they do not offer the stochastic element of the batch formation process of BatchNorm, which acts as a good regularizer. # Arguments x: 4D tensor, the tensor to feed through the block nfeats: Integer, number of feature maps for conv layers. ksize: Integer, width and height of conv kernels in first convolution. nskipped: Integer, number of conv layers for the residual function. elu: Boolean, whether to use ELU or BN+RELU. # Input shape 4D tensor with shape: `(batch, channels, rows, cols)` # Output shape 4D tensor with shape: `(batch, filters, rows, cols)` """ y0 = Conv2D(nfeats, ksize, padding='same')(x) y = y0 for i in range(nskipped): if elu: y = ELU()(y) else: y = BatchNormalization(axis=1)(y) y = Activation('relu')(y) y = Conv2D(nfeats, 1, padding='same')(y) return layers.add([y0, y]) def getwhere(x): ''' Calculate the 'where' mask that contains switches indicating which index contained the max value when MaxPool2D was applied. Using the gradient of the sum is a nice trick to keep everything high level.''' y_prepool, y_postpool = x return K.gradients(K.sum(y_postpool), y_prepool) if K.backend() == 'tensorflow': raise RuntimeError('This example can only run with the ' 'Theano backend for the time being, ' 'because it requires taking the gradient ' 'of a gradient, which isn\'t ' 'supported for all TensorFlow ops.') # This example assume 'channels_first' data format. K.set_image_data_format('channels_first') # input image dimensions img_rows, img_cols = 28, 28 # the data, split between train and test sets (x_train, _), (x_test, _) = mnist.load_data() x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # The size of the kernel used for the MaxPooling2D pool_size = 2 # The total number of feature maps at each layer nfeats = [8, 16, 32, 64, 128] # The sizes of the pooling kernel at each layer pool_sizes = np.array([1, 1, 1, 1, 1]) * pool_size # The convolution kernel size ksize = 3 # Number of epochs to train for epochs = 5 # Batch size during training batch_size = 128 if pool_size == 2: # if using a 5 layer net of pool_size = 2 x_train = np.pad(x_train, [[0, 0], [0, 0], [2, 2], [2, 2]], mode='constant') x_test = np.pad(x_test, [[0, 0], [0, 0], [2, 2], [2, 2]], mode='constant') nlayers = 5 elif pool_size == 3: # if using a 3 layer net of pool_size = 3 x_train = x_train[:, :, :-1, :-1] x_test = x_test[:, :, :-1, :-1] nlayers = 3 else: import sys sys.exit('Script supports pool_size of 2 and 3.') # Shape of input to train on (note that model is fully convolutional however) input_shape = x_train.shape[1:] # The final list of the size of axis=1 for all layers, including input nfeats_all = [input_shape[0]] + nfeats # First build the encoder, all the while keeping track of the 'where' masks img_input = Input(shape=input_shape) # We push the 'where' masks to the following list wheres = [None] * nlayers y = img_input for i in range(nlayers): y_prepool = convresblock(y, nfeats=nfeats_all[i + 1], ksize=ksize) y = MaxPooling2D(pool_size=(pool_sizes[i], pool_sizes[i]))(y_prepool) wheres[i] = layers.Lambda( getwhere, output_shape=lambda x: x[0])([y_prepool, y]) # Now build the decoder, and use the stored 'where' masks to place the features for i in range(nlayers): ind = nlayers - 1 - i y = UpSampling2D(size=(pool_sizes[ind], pool_sizes[ind]))(y) y = layers.multiply([y, wheres[ind]]) y = convresblock(y, nfeats=nfeats_all[ind], ksize=ksize) # Use hard_simgoid to clip range of reconstruction y = Activation('hard_sigmoid')(y) # Define the model and it's mean square error loss, and compile it with Adam model = Model(img_input, y) model.compile('adam', 'mse') # Fit the model model.fit(x_train, x_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, x_test)) # Plot x_recon = model.predict(x_test[:25]) x_plot = np.concatenate((x_test[:25], x_recon), axis=1) x_plot = x_plot.reshape((5, 10, input_shape[-2], input_shape[-1])) x_plot = np.vstack([np.hstack(x) for x in x_plot]) plt.figure() plt.axis('off') plt.title('Test Samples: Originals/Reconstructions') plt.imshow(x_plot, interpolation='none', cmap='gray') plt.savefig('reconstructions.png') Keras-2.2.4/examples/cifar10_cnn.py0000644000000000116100000001127213326715636016613 0ustar rooteng00000000000000'''Train a simple deep CNN on the CIFAR10 small images dataset. It gets to 75% validation accuracy in 25 epochs, and 79% after 50 epochs. (it's still underfitting at that point, though). ''' from __future__ import print_function import keras from keras.datasets import cifar10 from keras.preprocessing.image import ImageDataGenerator from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten from keras.layers import Conv2D, MaxPooling2D import os batch_size = 32 num_classes = 10 epochs = 100 data_augmentation = True num_predictions = 20 save_dir = os.path.join(os.getcwd(), 'saved_models') model_name = 'keras_cifar10_trained_model.h5' # The data, split between train and test sets: (x_train, y_train), (x_test, y_test) = cifar10.load_data() print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # Convert class vectors to binary class matrices. y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add(Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:])) model.add(Activation('relu')) model.add(Conv2D(32, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(64, (3, 3), padding='same')) model.add(Activation('relu')) model.add(Conv2D(64, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('softmax')) # initiate RMSprop optimizer opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) # Let's train the model using RMSprop model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 if not data_augmentation: print('Not using data augmentation.') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True) else: print('Using real-time data augmentation.') # This will do preprocessing and realtime data augmentation: datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening zca_epsilon=1e-06, # epsilon for ZCA whitening rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) # randomly shift images horizontally (fraction of total width) width_shift_range=0.1, # randomly shift images vertically (fraction of total height) height_shift_range=0.1, shear_range=0., # set range for random shear zoom_range=0., # set range for random zoom channel_shift_range=0., # set range for random channel shifts # set mode for filling points outside the input boundaries fill_mode='nearest', cval=0., # value used for fill_mode = "constant" horizontal_flip=True, # randomly flip images vertical_flip=False, # randomly flip images # set rescaling factor (applied before any other transformation) rescale=None, # set function that will be applied on each input preprocessing_function=None, # image data format, either "channels_first" or "channels_last" data_format=None, # fraction of images reserved for validation (strictly between 0 and 1) validation_split=0.0) # Compute quantities required for feature-wise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(x_train) # Fit the model on the batches generated by datagen.flow(). model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), epochs=epochs, validation_data=(x_test, y_test), workers=4) # Save model and weights if not os.path.isdir(save_dir): os.makedirs(save_dir) model_path = os.path.join(save_dir, model_name) model.save(model_path) print('Saved trained model at %s ' % model_path) # Score trained model. scores = model.evaluate(x_test, y_test, verbose=1) print('Test loss:', scores[0]) print('Test accuracy:', scores[1]) Keras-2.2.4/examples/mnist_dataset_api.py0000644000000000116100000000707113267437307020222 0ustar rooteng00000000000000'''MNIST classification with TensorFlow's Dataset API. Introduced in TensorFlow 1.3, the Dataset API is now the standard method for loading data into TensorFlow models. A Dataset is a sequence of elements, which are themselves composed of tf.Tensor components. For more details, see: https://www.tensorflow.org/programmers_guide/datasets To use this with Keras, we make a dataset out of elements of the form (input batch, output batch). From there, we create a one-shot iterator and a graph node corresponding to its get_next() method. Its components are then provided to the network's Input layer and the Model.compile() method, respectively. This example is intended to closely follow the mnist_tfrecord.py example. ''' import numpy as np import os import tempfile import keras from keras import backend as K from keras import layers from keras.datasets import mnist import tensorflow as tf if K.backend() != 'tensorflow': raise RuntimeError('This example can only run with the TensorFlow backend,' ' because it requires the Datset API, which is not' ' supported on other platforms.') def cnn_layers(inputs): x = layers.Conv2D(32, (3, 3), activation='relu', padding='valid')(inputs) x = layers.MaxPooling2D(pool_size=(2, 2))(x) x = layers.Conv2D(64, (3, 3), activation='relu')(x) x = layers.MaxPooling2D(pool_size=(2, 2))(x) x = layers.Flatten()(x) x = layers.Dense(512, activation='relu')(x) x = layers.Dropout(0.5)(x) predictions = layers.Dense(num_classes, activation='softmax', name='x_train_out')(x) return predictions batch_size = 128 buffer_size = 10000 steps_per_epoch = int(np.ceil(60000 / float(batch_size))) # = 469 epochs = 5 num_classes = 10 (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.astype(np.float32) / 255 x_train = np.expand_dims(x_train, -1) y_train = tf.one_hot(y_train, num_classes) # Create the dataset and its associated one-shot iterator. dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) dataset = dataset.repeat() dataset = dataset.shuffle(buffer_size) dataset = dataset.batch(batch_size) iterator = dataset.make_one_shot_iterator() # Model creation using tensors from the get_next() graph node. inputs, targets = iterator.get_next() model_input = layers.Input(tensor=inputs) model_output = cnn_layers(model_input) train_model = keras.models.Model(inputs=model_input, outputs=model_output) train_model.compile(optimizer=keras.optimizers.RMSprop(lr=2e-3, decay=1e-5), loss='categorical_crossentropy', metrics=['accuracy'], target_tensors=[targets]) train_model.summary() train_model.fit(epochs=epochs, steps_per_epoch=steps_per_epoch) # Save the model weights. weight_path = os.path.join(tempfile.gettempdir(), 'saved_wt.h5') train_model.save_weights(weight_path) # Clean up the TF session. K.clear_session() # Second session to test loading trained model without tensors. x_test = x_test.astype(np.float32) x_test = np.expand_dims(x_test, -1) x_test_inp = layers.Input(shape=x_test.shape[1:]) test_out = cnn_layers(x_test_inp) test_model = keras.models.Model(inputs=x_test_inp, outputs=test_out) test_model.load_weights(weight_path) test_model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy']) test_model.summary() loss, acc = test_model.evaluate(x_test, y_test, num_classes) print('\nTest accuracy: {0}'.format(acc)) Keras-2.2.4/examples/conv_lstm.py0000644000000000116100000001172013202372135016514 0ustar rooteng00000000000000""" This script demonstrates the use of a convolutional LSTM network. This network is used to predict the next frame of an artificially generated movie which contains moving squares. """ from keras.models import Sequential from keras.layers.convolutional import Conv3D from keras.layers.convolutional_recurrent import ConvLSTM2D from keras.layers.normalization import BatchNormalization import numpy as np import pylab as plt # We create a layer which take as input movies of shape # (n_frames, width, height, channels) and returns a movie # of identical shape. seq = Sequential() seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), input_shape=(None, 40, 40, 1), padding='same', return_sequences=True)) seq.add(BatchNormalization()) seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), padding='same', return_sequences=True)) seq.add(BatchNormalization()) seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), padding='same', return_sequences=True)) seq.add(BatchNormalization()) seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), padding='same', return_sequences=True)) seq.add(BatchNormalization()) seq.add(Conv3D(filters=1, kernel_size=(3, 3, 3), activation='sigmoid', padding='same', data_format='channels_last')) seq.compile(loss='binary_crossentropy', optimizer='adadelta') # Artificial data generation: # Generate movies with 3 to 7 moving squares inside. # The squares are of shape 1x1 or 2x2 pixels, # which move linearly over time. # For convenience we first create movies with bigger width and height (80x80) # and at the end we select a 40x40 window. def generate_movies(n_samples=1200, n_frames=15): row = 80 col = 80 noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float) shifted_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float) for i in range(n_samples): # Add 3 to 7 moving squares n = np.random.randint(3, 8) for j in range(n): # Initial position xstart = np.random.randint(20, 60) ystart = np.random.randint(20, 60) # Direction of motion directionx = np.random.randint(0, 3) - 1 directiony = np.random.randint(0, 3) - 1 # Size of the square w = np.random.randint(2, 4) for t in range(n_frames): x_shift = xstart + directionx * t y_shift = ystart + directiony * t noisy_movies[i, t, x_shift - w: x_shift + w, y_shift - w: y_shift + w, 0] += 1 # Make it more robust by adding noise. # The idea is that if during inference, # the value of the pixel is not exactly one, # we need to train the network to be robust and still # consider it as a pixel belonging to a square. if np.random.randint(0, 2): noise_f = (-1)**np.random.randint(0, 2) noisy_movies[i, t, x_shift - w - 1: x_shift + w + 1, y_shift - w - 1: y_shift + w + 1, 0] += noise_f * 0.1 # Shift the ground truth by 1 x_shift = xstart + directionx * (t + 1) y_shift = ystart + directiony * (t + 1) shifted_movies[i, t, x_shift - w: x_shift + w, y_shift - w: y_shift + w, 0] += 1 # Cut to a 40x40 window noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::] shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::] noisy_movies[noisy_movies >= 1] = 1 shifted_movies[shifted_movies >= 1] = 1 return noisy_movies, shifted_movies # Train the network noisy_movies, shifted_movies = generate_movies(n_samples=1200) seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10, epochs=300, validation_split=0.05) # Testing the network on one movie # feed it with the first 7 positions and then # predict the new positions which = 1004 track = noisy_movies[which][:7, ::, ::, ::] for j in range(16): new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::]) new = new_pos[::, -1, ::, ::, ::] track = np.concatenate((track, new), axis=0) # And then compare the predictions # to the ground truth track2 = noisy_movies[which][::, ::, ::, ::] for i in range(15): fig = plt.figure(figsize=(10, 5)) ax = fig.add_subplot(121) if i >= 7: ax.text(1, 3, 'Predictions !', fontsize=20, color='w') else: ax.text(1, 3, 'Initial trajectory', fontsize=20) toplot = track[i, ::, ::, 0] plt.imshow(toplot) ax = fig.add_subplot(122) plt.text(1, 3, 'Ground truth', fontsize=20) toplot = track2[i, ::, ::, 0] if i >= 2: toplot = shifted_movies[which][i - 1, ::, ::, 0] plt.imshow(toplot) plt.savefig('%i_animate.png' % (i + 1)) Keras-2.2.4/examples/neural_style_transfer.py0000644000000000116100000002444613342055016021134 0ustar rooteng00000000000000'''Neural style transfer with Keras. Run the script with: ``` python neural_style_transfer.py path_to_your_base_image.jpg \ path_to_your_reference.jpg prefix_for_results ``` e.g.: ``` python neural_style_transfer.py img/tuebingen.jpg \ img/starry_night.jpg results/my_result ``` Optional parameters: ``` --iter, To specify the number of iterations \ the style transfer takes place (Default is 10) --content_weight, The weight given to the content loss (Default is 0.025) --style_weight, The weight given to the style loss (Default is 1.0) --tv_weight, The weight given to the total variation loss (Default is 1.0) ``` It is preferable to run this script on GPU, for speed. Example result: https://twitter.com/fchollet/status/686631033085677568 # Details Style transfer consists in generating an image with the same "content" as a base image, but with the "style" of a different picture (typically artistic). This is achieved through the optimization of a loss function that has 3 components: "style loss", "content loss", and "total variation loss": - The total variation loss imposes local spatial continuity between the pixels of the combination image, giving it visual coherence. - The style loss is where the deep learning keeps in --that one is defined using a deep convolutional neural network. Precisely, it consists in a sum of L2 distances between the Gram matrices of the representations of the base image and the style reference image, extracted from different layers of a convnet (trained on ImageNet). The general idea is to capture color/texture information at different spatial scales (fairly large scales --defined by the depth of the layer considered). - The content loss is a L2 distance between the features of the base image (extracted from a deep layer) and the features of the combination image, keeping the generated image close enough to the original one. # References - [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508.06576) ''' from __future__ import print_function from keras.preprocessing.image import load_img, save_img, img_to_array import numpy as np from scipy.optimize import fmin_l_bfgs_b import time import argparse from keras.applications import vgg19 from keras import backend as K parser = argparse.ArgumentParser(description='Neural style transfer with Keras.') parser.add_argument('base_image_path', metavar='base', type=str, help='Path to the image to transform.') parser.add_argument('style_reference_image_path', metavar='ref', type=str, help='Path to the style reference image.') parser.add_argument('result_prefix', metavar='res_prefix', type=str, help='Prefix for the saved results.') parser.add_argument('--iter', type=int, default=10, required=False, help='Number of iterations to run.') parser.add_argument('--content_weight', type=float, default=0.025, required=False, help='Content weight.') parser.add_argument('--style_weight', type=float, default=1.0, required=False, help='Style weight.') parser.add_argument('--tv_weight', type=float, default=1.0, required=False, help='Total Variation weight.') args = parser.parse_args() base_image_path = args.base_image_path style_reference_image_path = args.style_reference_image_path result_prefix = args.result_prefix iterations = args.iter # these are the weights of the different loss components total_variation_weight = args.tv_weight style_weight = args.style_weight content_weight = args.content_weight # dimensions of the generated picture. width, height = load_img(base_image_path).size img_nrows = 400 img_ncols = int(width * img_nrows / height) # util function to open, resize and format pictures into appropriate tensors def preprocess_image(image_path): img = load_img(image_path, target_size=(img_nrows, img_ncols)) img = img_to_array(img) img = np.expand_dims(img, axis=0) img = vgg19.preprocess_input(img) return img # util function to convert a tensor into a valid image def deprocess_image(x): if K.image_data_format() == 'channels_first': x = x.reshape((3, img_nrows, img_ncols)) x = x.transpose((1, 2, 0)) else: x = x.reshape((img_nrows, img_ncols, 3)) # Remove zero-center by mean pixel x[:, :, 0] += 103.939 x[:, :, 1] += 116.779 x[:, :, 2] += 123.68 # 'BGR'->'RGB' x = x[:, :, ::-1] x = np.clip(x, 0, 255).astype('uint8') return x # get tensor representations of our images base_image = K.variable(preprocess_image(base_image_path)) style_reference_image = K.variable(preprocess_image(style_reference_image_path)) # this will contain our generated image if K.image_data_format() == 'channels_first': combination_image = K.placeholder((1, 3, img_nrows, img_ncols)) else: combination_image = K.placeholder((1, img_nrows, img_ncols, 3)) # combine the 3 images into a single Keras tensor input_tensor = K.concatenate([base_image, style_reference_image, combination_image], axis=0) # build the VGG16 network with our 3 images as input # the model will be loaded with pre-trained ImageNet weights model = vgg19.VGG19(input_tensor=input_tensor, weights='imagenet', include_top=False) print('Model loaded.') # get the symbolic outputs of each "key" layer (we gave them unique names). outputs_dict = dict([(layer.name, layer.output) for layer in model.layers]) # compute the neural style loss # first we need to define 4 util functions # the gram matrix of an image tensor (feature-wise outer product) def gram_matrix(x): assert K.ndim(x) == 3 if K.image_data_format() == 'channels_first': features = K.batch_flatten(x) else: features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) gram = K.dot(features, K.transpose(features)) return gram # the "style loss" is designed to maintain # the style of the reference image in the generated image. # It is based on the gram matrices (which capture style) of # feature maps from the style reference image # and from the generated image def style_loss(style, combination): assert K.ndim(style) == 3 assert K.ndim(combination) == 3 S = gram_matrix(style) C = gram_matrix(combination) channels = 3 size = img_nrows * img_ncols return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2)) # an auxiliary loss function # designed to maintain the "content" of the # base image in the generated image def content_loss(base, combination): return K.sum(K.square(combination - base)) # the 3rd loss function, total variation loss, # designed to keep the generated image locally coherent def total_variation_loss(x): assert K.ndim(x) == 4 if K.image_data_format() == 'channels_first': a = K.square( x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1]) b = K.square( x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:]) else: a = K.square( x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :]) b = K.square( x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :]) return K.sum(K.pow(a + b, 1.25)) # combine these loss functions into a single scalar loss = K.variable(0.) layer_features = outputs_dict['block5_conv2'] base_image_features = layer_features[0, :, :, :] combination_features = layer_features[2, :, :, :] loss += content_weight * content_loss(base_image_features, combination_features) feature_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1'] for layer_name in feature_layers: layer_features = outputs_dict[layer_name] style_reference_features = layer_features[1, :, :, :] combination_features = layer_features[2, :, :, :] sl = style_loss(style_reference_features, combination_features) loss += (style_weight / len(feature_layers)) * sl loss += total_variation_weight * total_variation_loss(combination_image) # get the gradients of the generated image wrt the loss grads = K.gradients(loss, combination_image) outputs = [loss] if isinstance(grads, (list, tuple)): outputs += grads else: outputs.append(grads) f_outputs = K.function([combination_image], outputs) def eval_loss_and_grads(x): if K.image_data_format() == 'channels_first': x = x.reshape((1, 3, img_nrows, img_ncols)) else: x = x.reshape((1, img_nrows, img_ncols, 3)) outs = f_outputs([x]) loss_value = outs[0] if len(outs[1:]) == 1: grad_values = outs[1].flatten().astype('float64') else: grad_values = np.array(outs[1:]).flatten().astype('float64') return loss_value, grad_values # this Evaluator class makes it possible # to compute loss and gradients in one pass # while retrieving them via two separate functions, # "loss" and "grads". This is done because scipy.optimize # requires separate functions for loss and gradients, # but computing them separately would be inefficient. class Evaluator(object): def __init__(self): self.loss_value = None self.grads_values = None def loss(self, x): assert self.loss_value is None loss_value, grad_values = eval_loss_and_grads(x) self.loss_value = loss_value self.grad_values = grad_values return self.loss_value def grads(self, x): assert self.loss_value is not None grad_values = np.copy(self.grad_values) self.loss_value = None self.grad_values = None return grad_values evaluator = Evaluator() # run scipy-based optimization (L-BFGS) over the pixels of the generated image # so as to minimize the neural style loss x = preprocess_image(base_image_path) for i in range(iterations): print('Start of iteration', i) start_time = time.time() x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), fprime=evaluator.grads, maxfun=20) print('Current loss value:', min_val) # save current generated image img = deprocess_image(x.copy()) fname = result_prefix + '_at_iteration_%d.png' % i save_img(fname, img) end_time = time.time() print('Image saved as', fname) print('Iteration %d completed in %ds' % (i, end_time - start_time)) Keras-2.2.4/setup.cfg0000644000000000116100000000011713355226624014151 0ustar rooteng00000000000000[metadata] description-file = README.md [egg_info] tag_build = tag_date = 0