Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/0000755000175000017500000000000013362335401021060 5ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/0000755000175000017500000000000013307306052022471 5ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/utils.py0000644000175000017500000003775613307306052024225 0ustar sinclairssinclairsimport numbers import numpy as np import theano import theano.tensor as T #: Tuple of ``int``-like types for ``isinstance`` checks. #: Specifically includes long integers and numpy integers. int_types = (numbers.Integral, np.integer) def floatX(arr): """Converts data to a numpy array of dtype ``theano.config.floatX``. Parameters ---------- arr : array_like The data to be converted. Returns ------- numpy ndarray The input array in the ``floatX`` dtype configured for Theano. If `arr` is an ndarray of correct dtype, it is returned as is. """ return np.asarray(arr, dtype=theano.config.floatX) def shared_empty(dim=2, dtype=None): """Creates empty Theano shared variable. Shortcut to create an empty Theano shared variable with the specified number of dimensions. Parameters ---------- dim : int, optional The number of dimensions for the empty variable, defaults to 2. dtype : a numpy data-type, optional The desired dtype for the variable. Defaults to the Theano ``floatX`` dtype. Returns ------- Theano shared variable An empty Theano shared variable of dtype ``dtype`` with `dim` dimensions. """ if dtype is None: dtype = theano.config.floatX shp = tuple([1] * dim) return theano.shared(np.zeros(shp, dtype=dtype)) def as_theano_expression(input): """Wrap as Theano expression. Wraps the given input as a Theano constant if it is not a valid Theano expression already. Useful to transparently handle numpy arrays and Python scalars, for example. Parameters ---------- input : number, numpy array or Theano expression Expression to be converted to a Theano constant. Returns ------- Theano symbolic constant Theano constant version of `input`. """ if isinstance(input, theano.gof.Variable): return input else: try: return theano.tensor.constant(input) except Exception as e: raise TypeError("Input of type %s is not a Theano expression and " "cannot be wrapped as a Theano constant (original " "exception: %s)" % (type(input), e)) def collect_shared_vars(expressions): """Returns all shared variables the given expression(s) depend on. Parameters ---------- expressions : Theano expression or iterable of Theano expressions The expressions to collect shared variables from. Returns ------- list of Theano shared variables All shared variables the given expression(s) depend on, in fixed order (as found by a left-recursive depth-first search). If some expressions are shared variables themselves, they are included in the result. """ # wrap single expression in list if isinstance(expressions, theano.Variable): expressions = [expressions] # return list of all shared variables return [v for v in theano.gof.graph.inputs(reversed(expressions)) if isinstance(v, theano.compile.SharedVariable)] def one_hot(x, m=None): """One-hot representation of integer vector. Given a vector of integers from 0 to m-1, returns a matrix with a one-hot representation, where each row corresponds to an element of x. Parameters ---------- x : integer vector The integer vector to convert to a one-hot representation. m : int, optional The number of different columns for the one-hot representation. This needs to be strictly greater than the maximum value of `x`. Defaults to ``max(x) + 1``. Returns ------- Theano tensor variable A Theano tensor variable of shape (``n``, `m`), where ``n`` is the length of `x`, with the one-hot representation of `x`. Notes ----- If your integer vector represents target class memberships, and you wish to compute the cross-entropy between predictions and the target class memberships, then there is no need to use this function, since the function :func:`lasagne.objectives.categorical_crossentropy()` can compute the cross-entropy from the integer vector directly. """ if m is None: m = T.cast(T.max(x) + 1, 'int32') return T.eye(m)[T.cast(x, 'int32')] def unique(l): """Filters duplicates of iterable. Create a new list from l with duplicate entries removed, while preserving the original order. Parameters ---------- l : iterable Input iterable to filter of duplicates. Returns ------- list A list of elements of `l` without duplicates and in the same order. """ new_list = [] seen = set() for el in l: if el not in seen: new_list.append(el) seen.add(el) return new_list def as_tuple(x, N, t=None): """ Coerce a value to a tuple of given length (and possibly given type). Parameters ---------- x : value or iterable N : integer length of the desired tuple t : type or tuple of type, optional required type or types for all elements Returns ------- tuple ``tuple(x)`` if `x` is iterable, ``(x,) * N`` otherwise. Raises ------ TypeError if `type` is given and `x` or any of its elements do not match it ValueError if `x` is iterable, but does not have exactly `N` elements """ try: X = tuple(x) except TypeError: X = (x,) * N if (t is not None) and not all(isinstance(v, t) for v in X): if t == int_types: expected_type = "int" # easier to understand elif isinstance(t, tuple): expected_type = " or ".join(tt.__name__ for tt in t) else: expected_type = t.__name__ raise TypeError("expected a single value or an iterable " "of {0}, got {1} instead".format(expected_type, x)) if len(X) != N: raise ValueError("expected a single value or an iterable " "with length {0}, got {1} instead".format(N, x)) return X def inspect_kwargs(func): """ Inspects a callable and returns a list of all optional keyword arguments. Parameters ---------- func : callable The callable to inspect Returns ------- kwargs : list of str Names of all arguments of `func` that have a default value, in order """ # We try the Python 3.x way first, then fall back to the Python 2.x way try: from inspect import signature except ImportError: # pragma: no cover from inspect import getargspec spec = getargspec(func) return spec.args[-len(spec.defaults):] if spec.defaults else [] else: # pragma: no cover params = signature(func).parameters return [p.name for p in params.values() if p.default is not p.empty] def compute_norms(array, norm_axes=None): """ Compute incoming weight vector norms. Parameters ---------- array : numpy array or Theano expression Weight or bias. norm_axes : sequence (list or tuple) The axes over which to compute the norm. This overrides the default norm axes defined for the number of dimensions in `array`. When this is not specified and `array` is a 2D array, this is set to `(0,)`. If `array` is a 3D, 4D or 5D array, it is set to a tuple listing all axes but axis 0. The former default is useful for working with dense layers, the latter is useful for 1D, 2D and 3D convolutional layers. Finally, in case `array` is a vector, `norm_axes` is set to an empty tuple, and this function will simply return the absolute value for each element. This is useful when the function is applied to all parameters of the network, including the bias, without distinction. (Optional) Returns ------- norms : 1D array or Theano vector (1D) 1D array or Theano vector of incoming weight/bias vector norms. Examples -------- >>> array = np.random.randn(100, 200) >>> norms = compute_norms(array) >>> norms.shape (200,) >>> norms = compute_norms(array, norm_axes=(1,)) >>> norms.shape (100,) """ # Check if supported type if not isinstance(array, theano.Variable) and \ not isinstance(array, np.ndarray): raise RuntimeError( "Unsupported type {}. " "Only theano variables and numpy arrays " "are supported".format(type(array)) ) # Compute default axes to sum over ndim = array.ndim if norm_axes is not None: sum_over = tuple(norm_axes) elif ndim == 1: # For Biases that are in 1d (e.g. b of DenseLayer) sum_over = () elif ndim == 2: # DenseLayer sum_over = (0,) elif ndim in [3, 4, 5]: # Conv{1,2,3}DLayer sum_over = tuple(range(1, ndim)) else: raise ValueError( "Unsupported tensor dimensionality {}. " "Must specify `norm_axes`".format(array.ndim) ) # Run numpy or Theano norm computation if isinstance(array, theano.Variable): # Apply theano version if it is a theano variable if len(sum_over) == 0: norms = T.abs_(array) # abs if we have nothing to sum over else: norms = T.sqrt(T.sum(array**2, axis=sum_over)) elif isinstance(array, np.ndarray): # Apply the numpy version if ndarray if len(sum_over) == 0: norms = abs(array) # abs if we have nothing to sum over else: norms = np.sqrt(np.sum(array**2, axis=sum_over)) return norms def create_param(spec, shape, name=None): """ Helper method to create Theano shared variables for layer parameters and to initialize them. Parameters ---------- spec : scalar number, numpy array, Theano expression, or callable Either of the following: * a scalar or a numpy array with the initial parameter values * a Theano expression or shared variable representing the parameters * a function or callable that takes the desired shape of the parameter array as its single argument and returns a numpy array, a Theano expression, or a shared variable representing the parameters. shape : iterable of int a tuple or other iterable of integers representing the desired shape of the parameter array. name : string, optional The name to give to the parameter variable. Ignored if `spec` is or returns a Theano expression or shared variable that already has a name. Returns ------- Theano shared variable or Theano expression A Theano shared variable or expression representing layer parameters. If a scalar or a numpy array was provided, a shared variable is initialized to contain this array. If a shared variable or expression was provided, it is simply returned. If a callable was provided, it is called, and its output is used to initialize a shared variable. Notes ----- This function is called by :meth:`Layer.add_param()` in the constructor of most :class:`Layer` subclasses. This enables those layers to support initialization with scalars, numpy arrays, existing Theano shared variables or expressions, and callables for generating initial parameter values, Theano expressions, or shared variables. """ import numbers # to check if argument is a number shape = tuple(shape) # convert to tuple if needed if any(d <= 0 for d in shape): raise ValueError(( "Cannot create param with a non-positive shape dimension. " "Tried to create param with shape=%r, name=%r") % (shape, name)) err_prefix = "cannot initialize parameter %s: " % name if callable(spec): spec = spec(shape) err_prefix += "the %s returned by the provided callable" else: err_prefix += "the provided %s" if isinstance(spec, numbers.Number) or isinstance(spec, np.generic) \ and spec.dtype.kind in 'biufc': spec = np.asarray(spec) if isinstance(spec, np.ndarray): if spec.shape != shape: raise ValueError("%s has shape %s, should be %s" % (err_prefix % "numpy array", spec.shape, shape)) # We assume parameter variables do not change shape after creation. # We can thus fix their broadcast pattern, to allow Theano to infer # broadcastable dimensions of expressions involving these parameters. bcast = tuple(s == 1 for s in shape) spec = theano.shared(spec, broadcastable=bcast) if isinstance(spec, theano.Variable): # We cannot check the shape here, Theano expressions (even shared # variables) do not have a fixed compile-time shape. We can check the # dimensionality though. if spec.ndim != len(shape): raise ValueError("%s has %d dimensions, should be %d" % (err_prefix % "Theano variable", spec.ndim, len(shape))) # We only assign a name if the user hasn't done so already. if not spec.name: spec.name = name return spec else: if "callable" in err_prefix: raise TypeError("%s is not a numpy array or a Theano expression" % (err_prefix % "value")) else: raise TypeError("%s is not a numpy array, a Theano expression, " "or a callable" % (err_prefix % "spec")) def unroll_scan(fn, sequences, outputs_info, non_sequences, n_steps, go_backwards=False): """ Helper function to unroll for loops. Can be used to unroll theano.scan. The parameter names are identical to theano.scan, please refer to here for more information. Note that this function does not support the truncate_gradient setting from theano.scan. Parameters ---------- fn : function Function that defines calculations at each step. sequences : TensorVariable or list of TensorVariables List of TensorVariable with sequence data. The function iterates over the first dimension of each TensorVariable. outputs_info : list of TensorVariables List of tensors specifying the initial values for each recurrent value. non_sequences: list of TensorVariables List of theano.shared variables that are used in the step function. n_steps: int Number of steps to unroll. go_backwards: bool If true the recursion starts at sequences[-1] and iterates backwards. Returns ------- List of TensorVariables. Each element in the list gives the recurrent values at each time step. """ if not isinstance(sequences, (list, tuple)): sequences = [sequences] # When backwards reverse the recursion direction counter = range(n_steps) if go_backwards: counter = counter[::-1] output = [] prev_vals = outputs_info for i in counter: step_input = [s[i] for s in sequences] + prev_vals + non_sequences out_ = fn(*step_input) # The returned values from step can be either a TensorVariable, # a list, or a tuple. Below, we force it to always be a list. if isinstance(out_, T.TensorVariable): out_ = [out_] if isinstance(out_, tuple): out_ = list(out_) output.append(out_) prev_vals = output[-1] # iterate over each scan output and convert it to same format as scan: # [[output11, output12,...output1n], # [output21, output22,...output2n],...] output_scan = [] for i in range(len(output[0])): l = map(lambda x: x[i], output) output_scan.append(T.stack(*l)) return output_scan Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/__init__.py0000644000175000017500000000162013307306052024601 0ustar sinclairssinclairs""" Tools to train neural nets in Theano """ try: install_instr = """ Please make sure you install a recent enough version of Theano. Note that a simple 'pip install theano' will usually give you a version that is too old for Lasagne. See the installation docs for more details: http://lasagne.readthedocs.org/en/latest/user/installation.html#theano""" import theano except ImportError: # pragma: no cover raise ImportError("Could not import Theano." + install_instr) else: try: import theano.tensor.signal.pool except ImportError: # pragma: no cover raise ImportError("Your Theano version is too old." + install_instr) del install_instr del theano from . import nonlinearities from . import init from . import layers from . import objectives from . import random from . import regularization from . import updates from . import utils __version__ = "0.2.dev1" Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/updates.py0000644000175000017500000007607613307306052024530 0ustar sinclairssinclairs""" Functions to generate Theano update dictionaries for training. The update functions implement different methods to control the learning rate for use with stochastic gradient descent. Update functions take a loss expression or a list of gradient expressions and a list of parameters as input and return an ordered dictionary of updates: .. autosummary:: :nosignatures: sgd momentum nesterov_momentum adagrad rmsprop adadelta adam adamax amsgrad Two functions can be used to further modify the updates to include momentum: .. autosummary:: :nosignatures: apply_momentum apply_nesterov_momentum Finally, we provide two helper functions to constrain the norm of tensors: .. autosummary:: :nosignatures: norm_constraint total_norm_constraint :func:`norm_constraint()` can be used to constrain the norm of parameters (as an alternative to weight decay), or for a form of gradient clipping. :func:`total_norm_constraint()` constrain the total norm of a list of tensors. This is often used when training recurrent neural networks. Examples -------- Using :func:`nesterov_momentum` to define an update dictionary for a toy example network: >>> import lasagne >>> import theano.tensor as T >>> import theano >>> from lasagne.nonlinearities import softmax >>> from lasagne.layers import InputLayer, DenseLayer, get_output >>> from lasagne.updates import nesterov_momentum >>> l_in = InputLayer((100, 20)) >>> l1 = DenseLayer(l_in, num_units=3, nonlinearity=softmax) >>> x = T.matrix('x') # shp: num_batch x num_features >>> y = T.ivector('y') # shp: num_batch >>> l_out = get_output(l1, x) >>> params = lasagne.layers.get_all_params(l1) >>> loss = T.mean(T.nnet.categorical_crossentropy(l_out, y)) >>> updates = nesterov_momentum(loss, params, learning_rate=1e-4, momentum=.9) >>> train_fn = theano.function([x, y], updates=updates) With :func:`apply_momentum` and :func:`apply_nesterov_momentum`, we can add momentum to optimization schemes that do not usually support this: >>> updates = lasagne.updates.rmsprop(loss, params, learning_rate=0.0001) >>> updates = lasagne.updates.apply_momentum(updates, params, momentum=0.9) All optimization schemes support symbolic variables for their hyperparameters, such as shared variables. This allows to vary hyperparameters during training without recompiling the training function. Note that the dtypes must match the dtypes of the network parameters, which follow Theano's ``floatX`` setting. In the following example, we use :func:`lasagne.utils.floatX` to ensure this: >>> eta = theano.shared(lasagne.utils.floatX(0.001)) >>> updates = lasagne.updates.adam(loss, params, learning_rate=eta) >>> train_fn = theano.function([x, y], updates=updates) >>> # we can now modify the learning rate at any time during training: >>> eta.set_value(lasagne.utils.floatX(eta.get_value() * 0.1)) """ from collections import OrderedDict import numpy as np import theano import theano.tensor as T from . import utils __all__ = [ "sgd", "apply_momentum", "momentum", "apply_nesterov_momentum", "nesterov_momentum", "adagrad", "rmsprop", "adadelta", "adam", "adamax", "amsgrad", "norm_constraint", "total_norm_constraint" ] def get_or_compute_grads(loss_or_grads, params): """Helper function returning a list of gradients Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to return the gradients for Returns ------- list of expressions If `loss_or_grads` is a list, it is assumed to be a list of gradients and returned as is, unless it does not match the length of `params`, in which case a `ValueError` is raised. Otherwise, `loss_or_grads` is assumed to be a cost expression and the function returns `theano.grad(loss_or_grads, params)`. Raises ------ ValueError If `loss_or_grads` is a list of a different length than `params`, or if any element of `params` is not a shared variable (while we could still compute its gradient, we can never update it and want to fail early). """ if any(not isinstance(p, theano.compile.SharedVariable) for p in params): raise ValueError("params must contain shared variables only. If it " "contains arbitrary parameter expressions, then " "lasagne.utils.collect_shared_vars() may help you.") if isinstance(loss_or_grads, list): if not len(loss_or_grads) == len(params): raise ValueError("Got %d gradient expressions for %d parameters" % (len(loss_or_grads), len(params))) return loss_or_grads else: return theano.grad(loss_or_grads, params) def sgd(loss_or_grads, params, learning_rate): """Stochastic Gradient Descent (SGD) updates Generates update expressions of the form: * ``param := param - learning_rate * gradient`` Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to generate update expressions for learning_rate : float or symbolic scalar The learning rate controlling the size of update steps Returns ------- OrderedDict A dictionary mapping each parameter to its update expression """ grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() for param, grad in zip(params, grads): updates[param] = param - learning_rate * grad return updates def apply_momentum(updates, params=None, momentum=0.9): """Returns a modified update dictionary including momentum Generates update expressions of the form: * ``velocity := momentum * velocity + updates[param] - param`` * ``param := param + velocity`` Parameters ---------- updates : OrderedDict A dictionary mapping parameters to update expressions params : iterable of shared variables, optional The variables to apply momentum to. If omitted, will apply momentum to all `updates.keys()`. momentum : float or symbolic scalar, optional The amount of momentum to apply. Higher momentum results in smoothing over more update steps. Defaults to 0.9. Returns ------- OrderedDict A copy of `updates` with momentum updates for all `params`. Notes ----- Higher momentum also results in larger update steps. To counter that, you can optionally scale your learning rate by `1 - momentum`. See Also -------- momentum : Shortcut applying momentum to SGD updates """ if params is None: params = updates.keys() updates = OrderedDict(updates) for param in params: value = param.get_value(borrow=True) velocity = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) x = momentum * velocity + updates[param] updates[velocity] = x - param updates[param] = x return updates def momentum(loss_or_grads, params, learning_rate, momentum=0.9): """Stochastic Gradient Descent (SGD) updates with momentum Generates update expressions of the form: * ``velocity := momentum * velocity - learning_rate * gradient`` * ``param := param + velocity`` Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to generate update expressions for learning_rate : float or symbolic scalar The learning rate controlling the size of update steps momentum : float or symbolic scalar, optional The amount of momentum to apply. Higher momentum results in smoothing over more update steps. Defaults to 0.9. Returns ------- OrderedDict A dictionary mapping each parameter to its update expression Notes ----- Higher momentum also results in larger update steps. To counter that, you can optionally scale your learning rate by `1 - momentum`. See Also -------- apply_momentum : Generic function applying momentum to updates nesterov_momentum : Nesterov's variant of SGD with momentum """ updates = sgd(loss_or_grads, params, learning_rate) return apply_momentum(updates, momentum=momentum) def apply_nesterov_momentum(updates, params=None, momentum=0.9): """Returns a modified update dictionary including Nesterov momentum Generates update expressions of the form: * ``velocity := momentum * velocity + updates[param] - param`` * ``param := param + momentum * velocity + updates[param] - param`` Parameters ---------- updates : OrderedDict A dictionary mapping parameters to update expressions params : iterable of shared variables, optional The variables to apply momentum to. If omitted, will apply momentum to all `updates.keys()`. momentum : float or symbolic scalar, optional The amount of momentum to apply. Higher momentum results in smoothing over more update steps. Defaults to 0.9. Returns ------- OrderedDict A copy of `updates` with momentum updates for all `params`. Notes ----- Higher momentum also results in larger update steps. To counter that, you can optionally scale your learning rate by `1 - momentum`. The classic formulation of Nesterov momentum (or Nesterov accelerated gradient) requires the gradient to be evaluated at the predicted next position in parameter space. Here, we use the formulation described at https://github.com/lisa-lab/pylearn2/pull/136#issuecomment-10381617, which allows the gradient to be evaluated at the current parameters. See Also -------- nesterov_momentum : Shortcut applying Nesterov momentum to SGD updates """ if params is None: params = updates.keys() updates = OrderedDict(updates) for param in params: value = param.get_value(borrow=True) velocity = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) x = momentum * velocity + updates[param] - param updates[velocity] = x updates[param] = momentum * x + updates[param] return updates def nesterov_momentum(loss_or_grads, params, learning_rate, momentum=0.9): """Stochastic Gradient Descent (SGD) updates with Nesterov momentum Generates update expressions of the form: * ``velocity := momentum * velocity - learning_rate * gradient`` * ``param := param + momentum * velocity - learning_rate * gradient`` Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to generate update expressions for learning_rate : float or symbolic scalar The learning rate controlling the size of update steps momentum : float or symbolic scalar, optional The amount of momentum to apply. Higher momentum results in smoothing over more update steps. Defaults to 0.9. Returns ------- OrderedDict A dictionary mapping each parameter to its update expression Notes ----- Higher momentum also results in larger update steps. To counter that, you can optionally scale your learning rate by `1 - momentum`. The classic formulation of Nesterov momentum (or Nesterov accelerated gradient) requires the gradient to be evaluated at the predicted next position in parameter space. Here, we use the formulation described at https://github.com/lisa-lab/pylearn2/pull/136#issuecomment-10381617, which allows the gradient to be evaluated at the current parameters. See Also -------- apply_nesterov_momentum : Function applying momentum to updates """ updates = sgd(loss_or_grads, params, learning_rate) return apply_nesterov_momentum(updates, momentum=momentum) def adagrad(loss_or_grads, params, learning_rate=1.0, epsilon=1e-6): """Adagrad updates Scale learning rates by dividing with the square root of accumulated squared gradients. See [1]_ for further description. Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to generate update expressions for learning_rate : float or symbolic scalar The learning rate controlling the size of update steps epsilon : float or symbolic scalar Small value added for numerical stability Returns ------- OrderedDict A dictionary mapping each parameter to its update expression Notes ----- Using step size eta Adagrad calculates the learning rate for feature i at time step t as: .. math:: \\eta_{t,i} = \\frac{\\eta} {\\sqrt{\\sum^t_{t^\\prime} g^2_{t^\\prime,i}+\\epsilon}} g_{t,i} as such the learning rate is monotonically decreasing. Epsilon is not included in the typical formula, see [2]_. References ---------- .. [1] Duchi, J., Hazan, E., & Singer, Y. (2011): Adaptive subgradient methods for online learning and stochastic optimization. JMLR, 12:2121-2159. .. [2] Chris Dyer: Notes on AdaGrad. http://www.ark.cs.cmu.edu/cdyer/adagrad.pdf """ grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() for param, grad in zip(params, grads): value = param.get_value(borrow=True) accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) accu_new = accu + grad ** 2 updates[accu] = accu_new updates[param] = param - (learning_rate * grad / T.sqrt(accu_new + epsilon)) return updates def rmsprop(loss_or_grads, params, learning_rate=1.0, rho=0.9, epsilon=1e-6): """RMSProp updates Scale learning rates by dividing with the moving average of the root mean squared (RMS) gradients. See [1]_ for further description. Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to generate update expressions for learning_rate : float or symbolic scalar The learning rate controlling the size of update steps rho : float or symbolic scalar Gradient moving average decay factor epsilon : float or symbolic scalar Small value added for numerical stability Returns ------- OrderedDict A dictionary mapping each parameter to its update expression Notes ----- `rho` should be between 0 and 1. A value of `rho` close to 1 will decay the moving average slowly and a value close to 0 will decay the moving average fast. Using the step size :math:`\\eta` and a decay factor :math:`\\rho` the learning rate :math:`\\eta_t` is calculated as: .. math:: r_t &= \\rho r_{t-1} + (1-\\rho)*g^2\\\\ \\eta_t &= \\frac{\\eta}{\\sqrt{r_t + \\epsilon}} References ---------- .. [1] Tieleman, T. and Hinton, G. (2012): Neural Networks for Machine Learning, Lecture 6.5 - rmsprop. Coursera. http://www.youtube.com/watch?v=O3sxAc4hxZU (formula @5:20) """ grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() # Using theano constant to prevent upcasting of float32 one = T.constant(1) for param, grad in zip(params, grads): value = param.get_value(borrow=True) accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) accu_new = rho * accu + (one - rho) * grad ** 2 updates[accu] = accu_new updates[param] = param - (learning_rate * grad / T.sqrt(accu_new + epsilon)) return updates def adadelta(loss_or_grads, params, learning_rate=1.0, rho=0.95, epsilon=1e-6): """ Adadelta updates Scale learning rates by the ratio of accumulated gradients to accumulated updates, see [1]_ and notes for further description. Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to generate update expressions for learning_rate : float or symbolic scalar The learning rate controlling the size of update steps rho : float or symbolic scalar Squared gradient moving average decay factor epsilon : float or symbolic scalar Small value added for numerical stability Returns ------- OrderedDict A dictionary mapping each parameter to its update expression Notes ----- rho should be between 0 and 1. A value of rho close to 1 will decay the moving average slowly and a value close to 0 will decay the moving average fast. rho = 0.95 and epsilon=1e-6 are suggested in the paper and reported to work for multiple datasets (MNIST, speech). In the paper, no learning rate is considered (so learning_rate=1.0). Probably best to keep it at this value. epsilon is important for the very first update (so the numerator does not become 0). Using the step size eta and a decay factor rho the learning rate is calculated as: .. math:: r_t &= \\rho r_{t-1} + (1-\\rho)*g^2\\\\ \\eta_t &= \\eta \\frac{\\sqrt{s_{t-1} + \\epsilon}} {\sqrt{r_t + \epsilon}}\\\\ s_t &= \\rho s_{t-1} + (1-\\rho)*(\\eta_t*g)^2 References ---------- .. [1] Zeiler, M. D. (2012): ADADELTA: An Adaptive Learning Rate Method. arXiv Preprint arXiv:1212.5701. """ grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() # Using theano constant to prevent upcasting of float32 one = T.constant(1) for param, grad in zip(params, grads): value = param.get_value(borrow=True) # accu: accumulate gradient magnitudes accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) # delta_accu: accumulate update magnitudes (recursively!) delta_accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) # update accu (as in rmsprop) accu_new = rho * accu + (one - rho) * grad ** 2 updates[accu] = accu_new # compute parameter update, using the 'old' delta_accu update = (grad * T.sqrt(delta_accu + epsilon) / T.sqrt(accu_new + epsilon)) updates[param] = param - learning_rate * update # update delta_accu (as accu, but accumulating updates) delta_accu_new = rho * delta_accu + (one - rho) * update ** 2 updates[delta_accu] = delta_accu_new return updates def adam(loss_or_grads, params, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8): """Adam updates Adam updates implemented as in [1]_. Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to generate update expressions for learning_rate : float or symbolic scalar Learning rate beta1 : float or symbolic scalar Exponential decay rate for the first moment estimates. beta2 : float or symbolic scalar Exponential decay rate for the second moment estimates. epsilon : float or symbolic scalar Constant for numerical stability. Returns ------- OrderedDict A dictionary mapping each parameter to its update expression Notes ----- The paper [1]_ includes an additional hyperparameter lambda. This is only needed to prove convergence of the algorithm and has no practical use (personal communication with the authors), it is therefore omitted here. References ---------- .. [1] Kingma, Diederik, and Jimmy Ba (2014): Adam: A Method for Stochastic Optimization. arXiv preprint arXiv:1412.6980. """ all_grads = get_or_compute_grads(loss_or_grads, params) t_prev = theano.shared(utils.floatX(0.)) updates = OrderedDict() # Using theano constant to prevent upcasting of float32 one = T.constant(1) t = t_prev + 1 a_t = learning_rate*T.sqrt(one-beta2**t)/(one-beta1**t) for param, g_t in zip(params, all_grads): value = param.get_value(borrow=True) m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) m_t = beta1*m_prev + (one-beta1)*g_t v_t = beta2*v_prev + (one-beta2)*g_t**2 step = a_t*m_t/(T.sqrt(v_t) + epsilon) updates[m_prev] = m_t updates[v_prev] = v_t updates[param] = param - step updates[t_prev] = t return updates def adamax(loss_or_grads, params, learning_rate=0.002, beta1=0.9, beta2=0.999, epsilon=1e-8): """Adamax updates Adamax updates implemented as in [1]_. This is a variant of of the Adam algorithm based on the infinity norm. Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to generate update expressions for learning_rate : float or symbolic scalar Learning rate beta1 : float or symbolic scalar Exponential decay rate for the first moment estimates. beta2 : float or symbolic scalar Exponential decay rate for the weighted infinity norm estimates. epsilon : float or symbolic scalar Constant for numerical stability. Returns ------- OrderedDict A dictionary mapping each parameter to its update expression References ---------- .. [1] Kingma, Diederik, and Jimmy Ba (2014): Adam: A Method for Stochastic Optimization. arXiv preprint arXiv:1412.6980. """ all_grads = get_or_compute_grads(loss_or_grads, params) t_prev = theano.shared(utils.floatX(0.)) updates = OrderedDict() # Using theano constant to prevent upcasting of float32 one = T.constant(1) t = t_prev + 1 a_t = learning_rate/(one-beta1**t) for param, g_t in zip(params, all_grads): value = param.get_value(borrow=True) m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) u_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) m_t = beta1*m_prev + (one-beta1)*g_t u_t = T.maximum(beta2*u_prev, abs(g_t)) step = a_t*m_t/(u_t + epsilon) updates[m_prev] = m_t updates[u_prev] = u_t updates[param] = param - step updates[t_prev] = t return updates def amsgrad(loss_or_grads, params, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8): """AMSGrad updates AMSGrad updates implemented as in [1]_. Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to generate update expressions for learning_rate : float or symbolic scalar Learning rate beta1 : float or symbolic scalar Exponential decay rate for the first moment estimates. beta2 : float or symbolic scalar Exponential decay rate for the second moment estimates. epsilon : float or symbolic scalar Constant for numerical stability. Returns ------- OrderedDict A dictionary mapping each parameter to its update expression References ---------- .. [1] https://openreview.net/forum?id=ryQu7f-RZ """ all_grads = get_or_compute_grads(loss_or_grads, params) t_prev = theano.shared(utils.floatX(0.)) updates = OrderedDict() # Using theano constant to prevent upcasting of float32 one = T.constant(1) t = t_prev + 1 a_t = learning_rate*T.sqrt(one-beta2**t)/(one-beta1**t) for param, g_t in zip(params, all_grads): value = param.get_value(borrow=True) m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) v_hat_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) m_t = beta1*m_prev + (one-beta1)*g_t v_t = beta2*v_prev + (one-beta2)*g_t**2 v_hat_t = T.maximum(v_hat_prev, v_t) step = a_t*m_t/(T.sqrt(v_hat_t) + epsilon) updates[m_prev] = m_t updates[v_prev] = v_t updates[v_hat_prev] = v_hat_t updates[param] = param - step updates[t_prev] = t return updates def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7): """Max weight norm constraints and gradient clipping This takes a TensorVariable and rescales it so that incoming weight norms are below a specified constraint value. Vectors violating the constraint are rescaled so that they are within the allowed range. Parameters ---------- tensor_var : TensorVariable Theano expression for update, gradient, or other quantity. max_norm : scalar This value sets the maximum allowed value of any norm in `tensor_var`. norm_axes : sequence (list or tuple) The axes over which to compute the norm. This overrides the default norm axes defined for the number of dimensions in `tensor_var`. When this is not specified and `tensor_var` is a matrix (2D), this is set to `(0,)`. If `tensor_var` is a 3D, 4D or 5D tensor, it is set to a tuple listing all axes but axis 0. The former default is useful for working with dense layers, the latter is useful for 1D, 2D and 3D convolutional layers. (Optional) epsilon : scalar, optional Value used to prevent numerical instability when dividing by very small or zero norms. Returns ------- TensorVariable Input `tensor_var` with rescaling applied to weight vectors that violate the specified constraints. Examples -------- >>> param = theano.shared( ... np.random.randn(100, 200).astype(theano.config.floatX)) >>> update = param + 100 >>> update = norm_constraint(update, 10) >>> func = theano.function([], [], updates=[(param, update)]) >>> # Apply constrained update >>> _ = func() >>> from lasagne.utils import compute_norms >>> norms = compute_norms(param.get_value()) >>> np.isclose(np.max(norms), 10) True Notes ----- When `norm_axes` is not specified, the axes over which the norm is computed depend on the dimensionality of the input variable. If it is 2D, it is assumed to come from a dense layer, and the norm is computed over axis 0. If it is 3D, 4D or 5D, it is assumed to come from a convolutional layer and the norm is computed over all trailing axes beyond axis 0. For other uses, you should explicitly specify the axes over which to compute the norm using `norm_axes`. """ ndim = tensor_var.ndim if norm_axes is not None: sum_over = tuple(norm_axes) elif ndim == 2: # DenseLayer sum_over = (0,) elif ndim in [3, 4, 5]: # Conv{1,2,3}DLayer sum_over = tuple(range(1, ndim)) else: raise ValueError( "Unsupported tensor dimensionality {}." "Must specify `norm_axes`".format(ndim) ) dtype = np.dtype(theano.config.floatX).type norms = T.sqrt(T.sum(T.sqr(tensor_var), axis=sum_over, keepdims=True)) target_norms = T.clip(norms, 0, dtype(max_norm)) constrained_output = \ (tensor_var * (target_norms / (dtype(epsilon) + norms))) return constrained_output def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, return_norm=False): """Rescales a list of tensors based on their combined norm If the combined norm of the input tensors exceeds the threshold then all tensors are rescaled such that the combined norm is equal to the threshold. Scaling the norms of the gradients is often used when training recurrent neural networks [1]_. Parameters ---------- tensor_vars : List of TensorVariables. Tensors to be rescaled. max_norm : float Threshold value for total norm. epsilon : scalar, optional Value used to prevent numerical instability when dividing by very small or zero norms. return_norm : bool If true the total norm is also returned. Returns ------- tensor_vars_scaled : list of TensorVariables The scaled tensor variables. norm : Theano scalar The combined norms of the input variables prior to rescaling, only returned if ``return_norms=True``. Examples -------- >>> from lasagne.layers import InputLayer, DenseLayer >>> import lasagne >>> from lasagne.updates import sgd, total_norm_constraint >>> x = T.matrix() >>> y = T.ivector() >>> l_in = InputLayer((5, 10)) >>> l1 = DenseLayer(l_in, num_units=7, nonlinearity=T.nnet.softmax) >>> output = lasagne.layers.get_output(l1, x) >>> cost = T.mean(T.nnet.categorical_crossentropy(output, y)) >>> all_params = lasagne.layers.get_all_params(l1) >>> all_grads = T.grad(cost, all_params) >>> scaled_grads = total_norm_constraint(all_grads, 5) >>> updates = sgd(scaled_grads, all_params, learning_rate=0.1) Notes ----- The total norm can be used to monitor training. References ---------- .. [1] Sutskever, I., Vinyals, O., & Le, Q. V. (2014): Sequence to sequence learning with neural networks. In Advances in Neural Information Processing Systems (pp. 3104-3112). """ norm = T.sqrt(sum(T.sum(tensor**2) for tensor in tensor_vars)) dtype = np.dtype(theano.config.floatX).type target_norm = T.clip(norm, 0, dtype(max_norm)) multiplier = target_norm / (dtype(epsilon) + norm) tensor_vars_scaled = [step*multiplier for step in tensor_vars] if return_norm: return tensor_vars_scaled, norm else: return tensor_vars_scaled Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/random.py0000644000175000017500000000246013307306052024325 0ustar sinclairssinclairs""" A module with a package-wide random number generator, used for weight initialization and seeding noise layers. This can be replaced by a :class:`numpy.random.RandomState` instance with a particular seed to facilitate reproducibility. Note: When using cuDNN, the backward passes of convolutional and max-pooling layers will introduce additional nondeterminism (for performance reasons). For 2D convolutions, you can enforce a deterministic backward pass implementation via the Theano flags ``dnn.conv.algo_bwd_filter=deterministic`` and ``dnn.conv.algo_bwd_data=deterministic``. Alternatively, you can disable cuDNN completely with ``dnn.enabled=False``. """ import numpy as np _rng = np.random def get_rng(): """Get the package-level random number generator. Returns ------- :class:`numpy.random.RandomState` instance The :class:`numpy.random.RandomState` instance passed to the most recent call of :func:`set_rng`, or ``numpy.random`` if :func:`set_rng` has never been called. """ return _rng def set_rng(new_rng): """Set the package-level random number generator. Parameters ---------- new_rng : ``numpy.random`` or a :class:`numpy.random.RandomState` instance The random number generator to use. """ global _rng _rng = new_rng Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/init.py0000644000175000017500000002664413307306052024022 0ustar sinclairssinclairs""" Functions to create initializers for parameter variables. Examples -------- >>> from lasagne.layers import DenseLayer >>> from lasagne.init import Constant, GlorotUniform >>> l1 = DenseLayer((100,20), num_units=50, ... W=GlorotUniform('relu'), b=Constant(0.0)) """ import numpy as np from .utils import floatX from .random import get_rng class Initializer(object): """Base class for parameter tensor initializers. The :class:`Initializer` class represents a weight initializer used to initialize weight parameters in a neural network layer. It should be subclassed when implementing new types of weight initializers. """ def __call__(self, shape): """ Makes :class:`Initializer` instances callable like a function, invoking their :meth:`sample()` method. """ return self.sample(shape) def sample(self, shape): """ Sample should return a theano.tensor of size shape and data type theano.config.floatX. Parameters ----------- shape : tuple or int Integer or tuple specifying the size of the returned matrix. returns : theano.tensor Matrix of size shape and dtype theano.config.floatX. """ raise NotImplementedError() class Normal(Initializer): """Sample initial weights from the Gaussian distribution. Initial weight parameters are sampled from N(mean, std). Parameters ---------- std : float Std of initial parameters. mean : float Mean of initial parameters. """ def __init__(self, std=0.01, mean=0.0): self.std = std self.mean = mean def sample(self, shape): return floatX(get_rng().normal(self.mean, self.std, size=shape)) class Uniform(Initializer): """Sample initial weights from the uniform distribution. Parameters are sampled from U(a, b). Parameters ---------- range : float or tuple When std is None then range determines a, b. If range is a float the weights are sampled from U(-range, range). If range is a tuple the weights are sampled from U(range[0], range[1]). std : float or None If std is a float then the weights are sampled from U(mean - np.sqrt(3) * std, mean + np.sqrt(3) * std). mean : float see std for description. """ def __init__(self, range=0.01, std=None, mean=0.0): if std is not None: a = mean - np.sqrt(3) * std b = mean + np.sqrt(3) * std else: try: a, b = range # range is a tuple except TypeError: a, b = -range, range # range is a number self.range = (a, b) def sample(self, shape): return floatX(get_rng().uniform( low=self.range[0], high=self.range[1], size=shape)) class Glorot(Initializer): """Glorot weight initialization. This is also known as Xavier initialization [1]_. Parameters ---------- initializer : lasagne.init.Initializer Initializer used to sample the weights, must accept `std` in its constructor to sample from a distribution with a given standard deviation. gain : float or 'relu' Scaling factor for the weights. Set this to ``1.0`` for linear and sigmoid units, to 'relu' or ``sqrt(2)`` for rectified linear units, and to ``sqrt(2/(1+alpha**2))`` for leaky rectified linear units with leakiness ``alpha``. Other transfer functions may need different factors. c01b : bool For a :class:`lasagne.layers.cuda_convnet.Conv2DCCLayer` constructed with ``dimshuffle=False``, `c01b` must be set to ``True`` to compute the correct fan-in and fan-out. References ---------- .. [1] Xavier Glorot and Yoshua Bengio (2010): Understanding the difficulty of training deep feedforward neural networks. International conference on artificial intelligence and statistics. Notes ----- For a :class:`DenseLayer `, if ``gain='relu'`` and ``initializer=Uniform``, the weights are initialized as .. math:: a &= \\sqrt{\\frac{12}{fan_{in}+fan_{out}}}\\\\ W &\sim U[-a, a] If ``gain=1`` and ``initializer=Normal``, the weights are initialized as .. math:: \\sigma &= \\sqrt{\\frac{2}{fan_{in}+fan_{out}}}\\\\ W &\sim N(0, \\sigma) See Also -------- GlorotNormal : Shortcut with Gaussian initializer. GlorotUniform : Shortcut with uniform initializer. """ def __init__(self, initializer, gain=1.0, c01b=False): if gain == 'relu': gain = np.sqrt(2) self.initializer = initializer self.gain = gain self.c01b = c01b def sample(self, shape): if self.c01b: if len(shape) != 4: raise RuntimeError( "If c01b is True, only shapes of length 4 are accepted") n1, n2 = shape[0], shape[3] receptive_field_size = shape[1] * shape[2] else: if len(shape) < 2: raise RuntimeError( "This initializer only works with shapes of length >= 2") n1, n2 = shape[:2] receptive_field_size = np.prod(shape[2:]) std = self.gain * np.sqrt(2.0 / ((n1 + n2) * receptive_field_size)) return self.initializer(std=std).sample(shape) class GlorotNormal(Glorot): """Glorot with weights sampled from the Normal distribution. See :class:`Glorot` for a description of the parameters. """ def __init__(self, gain=1.0, c01b=False): super(GlorotNormal, self).__init__(Normal, gain, c01b) class GlorotUniform(Glorot): """Glorot with weights sampled from the Uniform distribution. See :class:`Glorot` for a description of the parameters. """ def __init__(self, gain=1.0, c01b=False): super(GlorotUniform, self).__init__(Uniform, gain, c01b) class He(Initializer): """He weight initialization. Weights are initialized with a standard deviation of :math:`\\sigma = gain \\sqrt{\\frac{1}{fan_{in}}}` [1]_. Parameters ---------- initializer : lasagne.init.Initializer Initializer used to sample the weights, must accept `std` in its constructor to sample from a distribution with a given standard deviation. gain : float or 'relu' Scaling factor for the weights. Set this to ``1.0`` for linear and sigmoid units, to 'relu' or ``sqrt(2)`` for rectified linear units, and to ``sqrt(2/(1+alpha**2))`` for leaky rectified linear units with leakiness ``alpha``. Other transfer functions may need different factors. c01b : bool For a :class:`lasagne.layers.cuda_convnet.Conv2DCCLayer` constructed with ``dimshuffle=False``, `c01b` must be set to ``True`` to compute the correct fan-in and fan-out. References ---------- .. [1] Kaiming He et al. (2015): Delving deep into rectifiers: Surpassing human-level performance on imagenet classification. arXiv preprint arXiv:1502.01852. See Also ---------- HeNormal : Shortcut with Gaussian initializer. HeUniform : Shortcut with uniform initializer. """ def __init__(self, initializer, gain=1.0, c01b=False): if gain == 'relu': gain = np.sqrt(2) self.initializer = initializer self.gain = gain self.c01b = c01b def sample(self, shape): if self.c01b: if len(shape) != 4: raise RuntimeError( "If c01b is True, only shapes of length 4 are accepted") fan_in = np.prod(shape[:3]) else: if len(shape) == 2: fan_in = shape[0] elif len(shape) > 2: fan_in = np.prod(shape[1:]) else: raise RuntimeError( "This initializer only works with shapes of length >= 2") std = self.gain * np.sqrt(1.0 / fan_in) return self.initializer(std=std).sample(shape) class HeNormal(He): """He initializer with weights sampled from the Normal distribution. See :class:`He` for a description of the parameters. """ def __init__(self, gain=1.0, c01b=False): super(HeNormal, self).__init__(Normal, gain, c01b) class HeUniform(He): """He initializer with weights sampled from the Uniform distribution. See :class:`He` for a description of the parameters. """ def __init__(self, gain=1.0, c01b=False): super(HeUniform, self).__init__(Uniform, gain, c01b) class Constant(Initializer): """Initialize weights with constant value. Parameters ---------- val : float Constant value for weights. """ def __init__(self, val=0.0): self.val = val def sample(self, shape): return floatX(np.ones(shape) * self.val) class Sparse(Initializer): """Initialize weights as sparse matrix. Parameters ---------- sparsity : float Exact fraction of non-zero values per column. Larger values give less sparsity. std : float Non-zero weights are sampled from N(0, std). """ def __init__(self, sparsity=0.1, std=0.01): self.sparsity = sparsity self.std = std def sample(self, shape): if len(shape) != 2: raise RuntimeError( "sparse initializer only works with shapes of length 2") w = floatX(np.zeros(shape)) n_inputs, n_outputs = shape size = int(self.sparsity * n_inputs) # fraction of number of inputs for k in range(n_outputs): indices = np.arange(n_inputs) get_rng().shuffle(indices) indices = indices[:size] values = floatX(get_rng().normal(0.0, self.std, size=size)) w[indices, k] = values return w class Orthogonal(Initializer): """Intialize weights as Orthogonal matrix. Orthogonal matrix initialization [1]_. For n-dimensional shapes where n > 2, the n-1 trailing axes are flattened. For convolutional layers, this corresponds to the fan-in, so this makes the initialization usable for both dense and convolutional layers. Parameters ---------- gain : float or 'relu' Scaling factor for the weights. Set this to ``1.0`` for linear and sigmoid units, to 'relu' or ``sqrt(2)`` for rectified linear units, and to ``sqrt(2/(1+alpha**2))`` for leaky rectified linear units with leakiness ``alpha``. Other transfer functions may need different factors. References ---------- .. [1] Saxe, Andrew M., James L. McClelland, and Surya Ganguli. "Exact solutions to the nonlinear dynamics of learning in deep linear neural networks." arXiv preprint arXiv:1312.6120 (2013). """ def __init__(self, gain=1.0): if gain == 'relu': gain = np.sqrt(2) self.gain = gain def sample(self, shape): if len(shape) < 2: raise RuntimeError("Only shapes of length 2 or more are " "supported.") flat_shape = (shape[0], np.prod(shape[1:])) a = get_rng().normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) return floatX(self.gain * q) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/nonlinearities.py0000644000175000017500000002622013307306052026070 0ustar sinclairssinclairs# -*- coding: utf-8 -*- """ Non-linear activation functions for artificial neurons. """ import theano.tensor # sigmoid def sigmoid(x): """Sigmoid activation function :math:`\\varphi(x) = \\frac{1}{1 + e^{-x}}` Parameters ---------- x : float32 The activation (the summed, weighted input of a neuron). Returns ------- float32 in [0, 1] The output of the sigmoid function applied to the activation. """ return theano.tensor.nnet.sigmoid(x) # softmax (row-wise) def softmax(x): """Softmax activation function :math:`\\varphi(\\mathbf{x})_j = \\frac{e^{\mathbf{x}_j}}{\sum_{k=1}^K e^{\mathbf{x}_k}}` where :math:`K` is the total number of neurons in the layer. This activation function gets applied row-wise. Parameters ---------- x : float32 The activation (the summed, weighted input of a neuron). Returns ------- float32 where the sum of the row is 1 and each single value is in [0, 1] The output of the softmax function applied to the activation. """ return theano.tensor.nnet.softmax(x) # tanh def tanh(x): """Tanh activation function :math:`\\varphi(x) = \\tanh(x)` Parameters ---------- x : float32 The activation (the summed, weighted input of a neuron). Returns ------- float32 in [-1, 1] The output of the tanh function applied to the activation. """ return theano.tensor.tanh(x) # scaled tanh class ScaledTanH(object): """Scaled tanh :math:`\\varphi(x) = \\tanh(\\alpha \\cdot x) \\cdot \\beta` This is a modified tanh function which allows to rescale both the input and the output of the activation. Scaling the input down will result in decreasing the maximum slope of the tanh and as a result it will be in the linear regime in a larger interval of the input space. Scaling the input up will increase the maximum slope of the tanh and thus bring it closer to a step function. Scaling the output changes the output interval to :math:`[-\\beta,\\beta]`. Parameters ---------- scale_in : float32 The scale parameter :math:`\\alpha` for the input scale_out : float32 The scale parameter :math:`\\beta` for the output Methods ------- __call__(x) Apply the scaled tanh function to the activation `x`. Examples -------- In contrast to other activation functions in this module, this is a class that needs to be instantiated to obtain a callable: >>> from lasagne.layers import InputLayer, DenseLayer >>> l_in = InputLayer((None, 100)) >>> from lasagne.nonlinearities import ScaledTanH >>> scaled_tanh = ScaledTanH(scale_in=0.5, scale_out=2.27) >>> l1 = DenseLayer(l_in, num_units=200, nonlinearity=scaled_tanh) Notes ----- LeCun et al. (in [1]_, Section 4.4) suggest ``scale_in=2./3`` and ``scale_out=1.7159``, which has :math:`\\varphi(\\pm 1) = \\pm 1`, maximum second derivative at 1, and an effective gain close to 1. By carefully matching :math:`\\alpha` and :math:`\\beta`, the nonlinearity can also be tuned to preserve the mean and variance of its input: * ``scale_in=0.5``, ``scale_out=2.4``: If the input is a random normal variable, the output will have zero mean and unit variance. * ``scale_in=1``, ``scale_out=1.6``: Same property, but with a smaller linear regime in input space. * ``scale_in=0.5``, ``scale_out=2.27``: If the input is a uniform normal variable, the output will have zero mean and unit variance. * ``scale_in=1``, ``scale_out=1.48``: Same property, but with a smaller linear regime in input space. References ---------- .. [1] LeCun, Yann A., et al. (1998): Efficient BackProp, http://link.springer.com/chapter/10.1007/3-540-49430-8_2, http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf .. [2] Masci, Jonathan, et al. (2011): Stacked Convolutional Auto-Encoders for Hierarchical Feature Extraction, http://link.springer.com/chapter/10.1007/978-3-642-21735-7_7, http://people.idsia.ch/~ciresan/data/icann2011.pdf """ def __init__(self, scale_in=1, scale_out=1): self.scale_in = scale_in self.scale_out = scale_out def __call__(self, x): return theano.tensor.tanh(x * self.scale_in) * self.scale_out ScaledTanh = ScaledTanH # alias with alternative capitalization # rectify def rectify(x): """Rectify activation function :math:`\\varphi(x) = \\max(0, x)` Parameters ---------- x : float32 The activation (the summed, weighted input of a neuron). Returns ------- float32 The output of the rectify function applied to the activation. """ return theano.tensor.nnet.relu(x) # leaky rectify class LeakyRectify(object): """Leaky rectifier :math:`\\varphi(x) = (x > 0)? x : \\alpha \\cdot x` The leaky rectifier was introduced in [1]_. Compared to the standard rectifier :func:`rectify`, it has a nonzero gradient for negative input, which often helps convergence. Parameters ---------- leakiness : float Slope for negative input, usually between 0 and 1. A leakiness of 0 will lead to the standard rectifier, a leakiness of 1 will lead to a linear activation function, and any value in between will give a leaky rectifier. Methods ------- __call__(x) Apply the leaky rectify function to the activation `x`. Examples -------- In contrast to other activation functions in this module, this is a class that needs to be instantiated to obtain a callable: >>> from lasagne.layers import InputLayer, DenseLayer >>> l_in = InputLayer((None, 100)) >>> from lasagne.nonlinearities import LeakyRectify >>> custom_rectify = LeakyRectify(0.1) >>> l1 = DenseLayer(l_in, num_units=200, nonlinearity=custom_rectify) Alternatively, you can use the provided instance for leakiness=0.01: >>> from lasagne.nonlinearities import leaky_rectify >>> l2 = DenseLayer(l_in, num_units=200, nonlinearity=leaky_rectify) Or the one for a high leakiness of 1/3: >>> from lasagne.nonlinearities import very_leaky_rectify >>> l3 = DenseLayer(l_in, num_units=200, nonlinearity=very_leaky_rectify) See Also -------- leaky_rectify: Instance with default leakiness of 0.01, as in [1]_. very_leaky_rectify: Instance with high leakiness of 1/3, as in [2]_. References ---------- .. [1] Maas et al. (2013): Rectifier Nonlinearities Improve Neural Network Acoustic Models, http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf .. [2] Graham, Benjamin (2014): Spatially-sparse convolutional neural networks, http://arxiv.org/abs/1409.6070 """ def __init__(self, leakiness=0.01): self.leakiness = leakiness def __call__(self, x): return theano.tensor.nnet.relu(x, self.leakiness) leaky_rectify = LeakyRectify() # shortcut with default leakiness leaky_rectify.__doc__ = """leaky_rectify(x) Instance of :class:`LeakyRectify` with leakiness :math:`\\alpha=0.01` """ very_leaky_rectify = LeakyRectify(1./3) # shortcut with high leakiness very_leaky_rectify.__doc__ = """very_leaky_rectify(x) Instance of :class:`LeakyRectify` with leakiness :math:`\\alpha=1/3` """ # elu def elu(x): """Exponential Linear Unit :math:`\\varphi(x) = (x > 0) ? x : e^x - 1` The Exponential Linear Unit (ELU) was introduced in [1]_. Compared to the linear rectifier :func:`rectify`, it has a mean activation closer to zero and nonzero gradient for negative input, which can help convergence. Compared to the leaky rectifier :class:`LeakyRectify`, it saturates for highly negative inputs. Parameters ---------- x : float32 The activation (the summed, weighed input of a neuron). Returns ------- float32 The output of the exponential linear unit for the activation. Notes ----- In [1]_, an additional parameter :math:`\\alpha` controls the (negative) saturation value for negative inputs, but is set to 1 for all experiments. It is omitted here. References ---------- .. [1] Djork-Arné Clevert, Thomas Unterthiner, Sepp Hochreiter (2015): Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs), http://arxiv.org/abs/1511.07289 """ return theano.tensor.switch(x > 0, x, theano.tensor.expm1(x)) # selu class SELU(object): """ Scaled Exponential Linear Unit :math:`\\varphi(x)=\\lambda \\left[(x>0) ? x : \\alpha(e^x-1)\\right]` The Scaled Exponential Linear Unit (SELU) was introduced in [1]_ as an activation function that allows the construction of self-normalizing neural networks. Parameters ---------- scale : float32 The scale parameter :math:`\\lambda` for scaling all output. scale_neg : float32 The scale parameter :math:`\\alpha` for scaling output for nonpositive argument values. Methods ------- __call__(x) Apply the SELU function to the activation `x`. Examples -------- In contrast to other activation functions in this module, this is a class that needs to be instantiated to obtain a callable: >>> from lasagne.layers import InputLayer, DenseLayer >>> l_in = InputLayer((None, 100)) >>> from lasagne.nonlinearities import SELU >>> selu = SELU(2, 3) >>> l1 = DenseLayer(l_in, num_units=200, nonlinearity=selu) See Also -------- selu: Instance with :math:`\\alpha\\approx1.6733,\\lambda\\approx1.0507` as used in [1]_. References ---------- .. [1] Günter Klambauer et al. (2017): Self-Normalizing Neural Networks, https://arxiv.org/abs/1706.02515 """ def __init__(self, scale=1, scale_neg=1): self.scale = scale self.scale_neg = scale_neg def __call__(self, x): return self.scale * theano.tensor.switch( x > 0.0, x, self.scale_neg * (theano.tensor.expm1(x))) selu = SELU(scale=1.0507009873554804934193349852946, scale_neg=1.6732632423543772848170429916717) selu.__doc__ = """selu(x) Instance of :class:`SELU` with :math:`\\alpha\\approx 1.6733, \\lambda\\approx 1.0507` This has a stable and attracting fixed point of :math:`\\mu=0`, :math:`\\sigma=1` under the assumptions of the original paper on self-normalizing neural networks. """ # softplus def softplus(x): """Softplus activation function :math:`\\varphi(x) = \\log(1 + e^x)` Parameters ---------- x : float32 The activation (the summed, weighted input of a neuron). Returns ------- float32 The output of the softplus function applied to the activation. """ return theano.tensor.nnet.softplus(x) # linear def linear(x): """Linear activation function :math:`\\varphi(x) = x` Parameters ---------- x : float32 The activation (the summed, weighted input of a neuron). Returns ------- float32 The output of the identity applied to the activation. """ return x identity = linear Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/conftest.py0000644000175000017500000000055213307306052024672 0ustar sinclairssinclairsignore_test_paths = [ "*/layers/corrmm.py", "*/layers/cuda_convnet.py", "*/layers/dnn.py", ] def pytest_ignore_collect(path, config): """Ignore paths that would otherwise be collceted by the doctest plugin and lead to ImportError due to missing dependencies. """ return any(path.fnmatch(ignore) for ignore in ignore_test_paths) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/theano_extensions/0000755000175000017500000000000013307306052026226 5ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/theano_extensions/__init__.py0000644000175000017500000000000013307306052030325 0ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/theano_extensions/padding.py0000644000175000017500000000260213307306052030206 0ustar sinclairssinclairs""" Padding """ import theano.tensor as T from ..utils import int_types def pad(x, width, val=0, batch_ndim=1): """ Pad a tensor with a constant value. Parameters ---------- x : tensor width : int, iterable of int, or iterable of tuple Padding width. If an int, pads each axis symmetrically with the same amount in the beginning and end. If an iterable of int, defines the symmetric padding width separately for each axis. If an iterable of tuples of two ints, defines a seperate padding width for each beginning and end of each axis. val : float The constant value used for padding batch_ndim : integer Dimensions before the value will not be padded. """ input_shape = x.shape input_ndim = x.ndim output_shape = list(input_shape) indices = [slice(None) for _ in output_shape] if isinstance(width, int_types): widths = [width] * (input_ndim - batch_ndim) else: widths = width for k, w in enumerate(widths): try: l, r = w except TypeError: l = r = w output_shape[k + batch_ndim] += l + r indices[k + batch_ndim] = slice(l, l + input_shape[k + batch_ndim]) if val: out = T.ones(output_shape) * val else: out = T.zeros(output_shape) return T.set_subtensor(out[tuple(indices)], x) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/theano_extensions/conv.py0000644000175000017500000002422713307306052027554 0ustar sinclairssinclairs""" Alternative convolution implementations for Theano """ import numpy as np import theano.tensor as T from ..utils import int_types # 1D convolutions def conv1d_sc(input, filters, image_shape=None, filter_shape=None, border_mode='valid', subsample=(1,), filter_flip=True): """ using conv2d with a single input channel """ if border_mode not in ('valid', 0, (0,)): raise RuntimeError("Unsupported border_mode for conv1d_sc: " "%s" % border_mode) if image_shape is None: image_shape_sc = None else: # (b, c, i0) to (b, 1, c, i0) image_shape_sc = (image_shape[0], 1, image_shape[1], image_shape[2]) if filter_shape is None: filter_shape_sc = None else: filter_shape_sc = (filter_shape[0], 1, filter_shape[1], filter_shape[2]) input_sc = input.dimshuffle(0, 'x', 1, 2) # We need to flip the channels dimension because it will be convolved over. filters_sc = filters.dimshuffle(0, 'x', 1, 2)[:, :, ::-1, :] conved = T.nnet.conv2d(input_sc, filters_sc, image_shape_sc, filter_shape_sc, subsample=(1, subsample[0]), filter_flip=filter_flip) return conved[:, :, 0, :] # drop the unused dimension def conv1d_mc0(input, filters, image_shape=None, filter_shape=None, border_mode='valid', subsample=(1,), filter_flip=True, num_groups=1): """ using conv2d with width == 1 """ if image_shape is None: image_shape_mc0 = None else: # (b, c, i0) to (b, c, 1, i0) image_shape_mc0 = (image_shape[0], image_shape[1], 1, image_shape[2]) if filter_shape is None: filter_shape_mc0 = None else: filter_shape_mc0 = (filter_shape[0], filter_shape[1], 1, filter_shape[2]) if isinstance(border_mode, tuple): (border_mode,) = border_mode if isinstance(border_mode, int_types): border_mode = (0, border_mode) input_mc0 = input.dimshuffle(0, 1, 'x', 2) filters_mc0 = filters.dimshuffle(0, 1, 'x', 2) extra_kwargs = {'num_groups': num_groups} if num_groups > 1 else {} conved = T.nnet.conv2d( input_mc0, filters_mc0, image_shape_mc0, filter_shape_mc0, subsample=(1, subsample[0]), border_mode=border_mode, filter_flip=filter_flip, **extra_kwargs) return conved[:, :, 0, :] # drop the unused dimension def conv1d_mc1(input, filters, image_shape=None, filter_shape=None, border_mode='valid', subsample=(1,), filter_flip=True, num_groups=1): """ using conv2d with height == 1 """ if image_shape is None: image_shape_mc1 = None else: # (b, c, i0) to (b, c, i0, 1) image_shape_mc1 = (image_shape[0], image_shape[1], image_shape[2], 1) if filter_shape is None: filter_shape_mc1 = None else: filter_shape_mc1 = (filter_shape[0], filter_shape[1], filter_shape[2], 1) if isinstance(border_mode, tuple): (border_mode,) = border_mode if isinstance(border_mode, int_types): border_mode = (border_mode, 0) input_mc1 = input.dimshuffle(0, 1, 2, 'x') filters_mc1 = filters.dimshuffle(0, 1, 2, 'x') extra_kwargs = {'num_groups': num_groups} if num_groups > 1 else {} conved = T.nnet.conv2d( input_mc1, filters_mc1, image_shape_mc1, filter_shape_mc1, subsample=(subsample[0], 1), border_mode=border_mode, filter_flip=filter_flip, **extra_kwargs) return conved[:, :, :, 0] # drop the unused dimension def conv1d_unstrided(input, filters, image_shape, filter_shape, border_mode='valid', subsample=(1,), filter_flip=True, implementation=conv1d_sc): """ perform a strided 1D convolution by reshaping input and filters so that the stride becomes 1. This function requires that the filter length is a multiple of the stride. It also truncates the input to have a length that is a multiple of the stride. """ batch_size, num_input_channels, input_length = image_shape num_filters, num_input_channels_, filter_length = filter_shape stride = subsample[0] if filter_length % stride > 0: raise RuntimeError("Filter length (%d) is not a multiple of the " "stride (%d)" % (filter_length, stride)) # TODO: test if this works for border_mode='full' if border_mode not in ('valid', 0, (0,)): raise RuntimeError("Unsupported border_mode for conv1d_unstrided: " "%s" % border_mode) num_steps = filter_length // stride # input sizes need to be multiples of the strides, # truncate to correct sizes. truncated_length = (input_length // stride) * stride input_truncated = input[:, :, :truncated_length] r_input_shape = (batch_size, num_input_channels, truncated_length // stride, stride) r_input = input_truncated.reshape(r_input_shape) # fold strides into the feature maps dimension (input) r_input_folded_shape = (batch_size, num_input_channels * stride, truncated_length // stride) r_input_folded = r_input.dimshuffle( 0, 1, 3, 2).reshape(r_input_folded_shape) r_filter_shape = (num_filters, num_input_channels, num_steps, stride) r_filters_flipped = filters[:, :, ::-1].reshape(r_filter_shape) # fold strides into the feature maps dimension (filters) r_filter_folded_shape = (num_filters, num_input_channels * stride, num_steps) r_filters_flipped_folded = r_filters_flipped.dimshuffle( 0, 1, 3, 2).reshape(r_filter_folded_shape) r_filters_folded = r_filters_flipped_folded[:, :, ::-1] # unflip return implementation(r_input_folded, r_filters_folded, r_input_folded_shape, r_filter_folded_shape, border_mode, subsample=(1,), filter_flip=filter_flip) def conv1d_sd(input, filters, image_shape, filter_shape, border_mode='valid', subsample=(1,), filter_flip=True): """ using a single dot product """ if border_mode not in ('valid', 0, (0,)): raise RuntimeError("Unsupported border_mode for conv1d_sd: " "%s" % border_mode) batch_size, num_input_channels, input_length = image_shape num_filters, num_input_channels_, filter_length = filter_shape stride = subsample[0] if filter_length % stride > 0: raise RuntimeError("Filter length (%d) is not a multiple of the " "stride (%d)" % (filter_length, stride)) num_steps = filter_length // stride output_length = (input_length - filter_length + stride) // stride # pad the input so all the shifted dot products fit inside. # shape is (b, c, l) padded_length = ((input_length // filter_length) * filter_length + (num_steps - 1) * stride) # at this point, it is possible that the padded_length is SMALLER than the # input size. so then we have to truncate first. truncated_length = min(input_length, padded_length) input_truncated = input[:, :, :truncated_length] input_padded_shape = (batch_size, num_input_channels, padded_length) input_padded = T.zeros(input_padded_shape) input_padded = T.set_subtensor(input_padded[:, :, :truncated_length], input_truncated) inputs = [] for num in range(num_steps): shift = num * stride length = (padded_length - shift) // filter_length r_input_shape = (batch_size, num_input_channels, length, filter_length) r_input = input_padded[ :, :, shift:length * filter_length + shift].reshape(r_input_shape) inputs.append(r_input) inputs_stacked = T.stack(*inputs) # shape is (n, b, c, w, f) filters_flipped = filters[:, :, ::-1] if filter_flip else filters r_conved = T.tensordot(inputs_stacked, filters_flipped, np.asarray([[2, 4], [1, 2]])) # resulting shape is (n, b, w, n_filters) # output needs to be (b, n_filters, w * n) r_conved = r_conved.dimshuffle(1, 3, 2, 0) # (b, n_filters, w, n) conved = r_conved.reshape((r_conved.shape[0], r_conved.shape[1], r_conved.shape[2] * r_conved.shape[3])) # result is (b, n_f, l) # remove padding return conved[:, :, :output_length] def conv1d_md(input, filters, image_shape, filter_shape, border_mode='valid', subsample=(1,), filter_flip=True): """ using multiple dot products """ if border_mode not in ('valid', 0, (0,)): raise RuntimeError("Unsupported border_mode for conv1d_md: " "%s" % border_mode) batch_size, num_input_channels, input_length = image_shape num_filters, num_input_channels_, filter_length = filter_shape stride = subsample[0] if filter_length % stride > 0: raise RuntimeError("Filter length (%d) is not a multiple of the " "stride (%d)" % (filter_length, stride)) num_steps = filter_length // stride output_length = (input_length - filter_length + stride) // stride output_shape = (batch_size, num_filters, output_length) filters_flipped = filters[:, :, ::-1] if filter_flip else filters conved = T.zeros(output_shape) for num in range(num_steps): shift = num * stride length = (input_length - shift) // filter_length if length == 0: # we can safely skip this product, it doesn't contribute to the # final convolution. continue r_input_shape = (batch_size, num_input_channels, length, filter_length) r_input = input[ :, :, shift:length * filter_length + shift].reshape(r_input_shape) # shape (b, l, n_filters) r_conved = T.tensordot(r_input, filters_flipped, np.asarray([[1, 3], [1, 2]])) r_conved = r_conved.dimshuffle(0, 2, 1) # shape is (b, n_filters, l) conved = T.set_subtensor(conved[:, :, num::num_steps], r_conved) return conved # TODO: conv1d_md_channelslast? # 2D convolutions # TODO Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/regularization.py0000644000175000017500000001240013307306052026077 0ustar sinclairssinclairs""" Functions to apply regularization to the weights in a network. We provide functions to calculate the L1 and L2 penalty. Penalty functions take a tensor as input and calculate the penalty contribution from that tensor: .. autosummary:: :nosignatures: l1 l2 A helper function can be used to apply a penalty function to a tensor or a list of tensors: .. autosummary:: :nosignatures: apply_penalty Finally we provide two helper functions for applying a penalty function to the parameters in a layer or the parameters in a group of layers: .. autosummary:: :nosignatures: regularize_layer_params_weighted regularize_network_params Examples -------- >>> import lasagne >>> import theano.tensor as T >>> import theano >>> from lasagne.nonlinearities import softmax >>> from lasagne.layers import InputLayer, DenseLayer, get_output >>> from lasagne.regularization import regularize_layer_params_weighted, l2, l1 >>> from lasagne.regularization import regularize_layer_params >>> layer_in = InputLayer((100, 20)) >>> layer1 = DenseLayer(layer_in, num_units=3) >>> layer2 = DenseLayer(layer1, num_units=5, nonlinearity=softmax) >>> x = T.matrix('x') # shp: num_batch x num_features >>> y = T.ivector('y') # shp: num_batch >>> l_out = get_output(layer2, x) >>> loss = T.mean(T.nnet.categorical_crossentropy(l_out, y)) >>> layers = {layer1: 0.1, layer2: 0.5} >>> l2_penalty = regularize_layer_params_weighted(layers, l2) >>> l1_penalty = regularize_layer_params(layer2, l1) * 1e-4 >>> loss = loss + l2_penalty + l1_penalty """ import theano.tensor as T from .layers import Layer, get_all_params def l1(x): """Computes the L1 norm of a tensor Parameters ---------- x : Theano tensor Returns ------- Theano scalar l1 norm (sum of absolute values of elements) """ return T.sum(abs(x)) def l2(x): """Computes the squared L2 norm of a tensor Parameters ---------- x : Theano tensor Returns ------- Theano scalar squared l2 norm (sum of squared values of elements) """ return T.sum(x**2) def apply_penalty(tensor_or_tensors, penalty, **kwargs): """ Computes the total cost for applying a specified penalty to a tensor or group of tensors. Parameters ---------- tensor_or_tensors : Theano tensor or list of tensors penalty : callable **kwargs keyword arguments passed to penalty. Returns ------- Theano scalar a scalar expression for the total penalty cost """ try: return sum(penalty(x, **kwargs) for x in tensor_or_tensors) except (TypeError, ValueError): return penalty(tensor_or_tensors, **kwargs) def regularize_layer_params(layer, penalty, tags={'regularizable': True}, **kwargs): """ Computes a regularization cost by applying a penalty to the parameters of a layer or group of layers. Parameters ---------- layer : a :class:`Layer` instances or list of layers. penalty : callable tags: dict Tag specifications which filter the parameters of the layer or layers. By default, only parameters with the `regularizable` tag are included. **kwargs keyword arguments passed to penalty. Returns ------- Theano scalar a scalar expression for the cost """ layers = [layer, ] if isinstance(layer, Layer) else layer all_params = [] for layer in layers: all_params += layer.get_params(**tags) return apply_penalty(all_params, penalty, **kwargs) def regularize_layer_params_weighted(layers, penalty, tags={'regularizable': True}, **kwargs): """ Computes a regularization cost by applying a penalty to the parameters of a layer or group of layers, weighted by a coefficient for each layer. Parameters ---------- layers : dict A mapping from :class:`Layer` instances to coefficients. penalty : callable tags: dict Tag specifications which filter the parameters of the layer or layers. By default, only parameters with the `regularizable` tag are included. **kwargs keyword arguments passed to penalty. Returns ------- Theano scalar a scalar expression for the cost """ return sum(coeff * apply_penalty(layer.get_params(**tags), penalty, **kwargs) for layer, coeff in layers.items() ) def regularize_network_params(layer, penalty, tags={'regularizable': True}, **kwargs): """ Computes a regularization cost by applying a penalty to the parameters of all layers in a network. Parameters ---------- layer : a :class:`Layer` instance. Parameters of this layer and all layers below it will be penalized. penalty : callable tags: dict Tag specifications which filter the parameters of the layer or layers. By default, only parameters with the `regularizable` tag are included. **kwargs keyword arguments passed to penalty. Returns ------- Theano scalar a scalar expression for the cost """ return apply_penalty(get_all_params(layer, **tags), penalty, **kwargs) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/0000755000175000017500000000000013307306052023633 5ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/test_updates.py0000644000175000017500000001775013307306052026723 0ustar sinclairssinclairsimport pytest import numpy as np import theano import theano.tensor as T import lasagne PCT_TOLERANCE = 1E-5 class TestUpdateFunctions(object): # These tests compare results on a toy problem to values # calculated by the torch.optim package, using this script: # https://gist.github.com/ebenolson/931e879ed38f257253d2 torch_values = {'sgd': [0.81707280688755, 0.6648326359915, 0.5386151140949], 'momentum': [0.6848486952183, 0.44803321781003, 0.27431190123502], 'nesterov_momentum': [0.67466543592725, 0.44108468114241, 0.2769002108997], 'adagrad': [0.55373120047759, 0.55373120041518, 0.55373120039438], 'rmsprop': [0.83205403985348, 0.83205322744821, 0.83205295664444], 'adadelta': [0.95453237704725, 0.9545237471374, 0.95452214847397], 'adam': [0.90034972009036, 0.90034967993061, 0.90034966654402], 'adamax': [0.90211749000754, 0.90211748762402, 0.90211748682951], 'amsgrad': [0.90034979581833, 0.90034979581833, 0.90034979581833], } def f(self, X): return ([0.1, 0.2, 0.3] * X**2).sum() @pytest.mark.parametrize('method, kwargs', [ ['sgd', {'learning_rate': 0.1}], ['momentum', {'learning_rate': 0.1, 'momentum': 0.5}], ['nesterov_momentum', {'learning_rate': 0.1, 'momentum': 0.5}], ['adagrad', {'learning_rate': 0.1}], ['rmsprop', {'learning_rate': 0.01}], ['adadelta', {}], ['adam', {'learning_rate': 0.01}], ['adamax', {'learning_rate': 0.01}], ['amsgrad', {'learning_rate': 0.01}], ]) def test_updates(self, method, kwargs): A = theano.shared(lasagne.utils.floatX([1, 1, 1])) B = theano.shared(lasagne.utils.floatX([1, 1, 1])) update_func = getattr(lasagne.updates, method) updates = update_func(self.f(A) + self.f(B), [A, B], **kwargs) do_update = theano.function([], [], updates=updates) for _ in range(10): do_update() assert np.allclose(A.get_value(), B.get_value()) assert np.allclose(A.get_value(), self.torch_values[method]) @pytest.mark.parametrize('method, kwargs', [ ['sgd', {'learning_rate': 0.1}], ['momentum', {'learning_rate': 0.1, 'momentum': 0.5}], ['nesterov_momentum', {'learning_rate': 0.1, 'momentum': 0.5}], ['adagrad', {'learning_rate': 0.1, 'epsilon': 1e-6}], ['rmsprop', {'learning_rate': 0.01, 'rho': 0.9, 'epsilon': 1e-6}], ['adadelta', {'learning_rate': 0.01, 'rho': 0.9, 'epsilon': 1e-6}], ['adam', {'learning_rate': 0.01, 'beta1': 0.9, 'beta2': 0.999, 'epsilon': 1e-8}], ['adamax', {'learning_rate': 0.01, 'beta1': 0.9, 'beta2': 0.999, 'epsilon': 1e-8}], ['amsgrad', {'learning_rate': 0.01, 'beta1': 0.9, 'beta2': 0.999, 'epsilon': 1e-8}], ]) def test_update_returntype(self, method, kwargs): '''Checks whether lasagne.updates handles float32 inputs correctly''' floatX_ = theano.config.floatX theano.config.floatX = 'float32' try: A = theano.shared(lasagne.utils.floatX([1, 1, 1])) B = theano.shared(lasagne.utils.floatX([1, 1, 1])) update_func = getattr(lasagne.updates, method) updates = update_func(self.f(A) + self.f(B), [A, B], **kwargs) assert all(v.dtype == 'float32' for v in updates) # Checking for float32 arguments for param in kwargs: kwargs[param] = np.float32(kwargs[param]) updates = update_func(self.f(A) + self.f(B), [A, B], **kwargs) assert all(v.dtype == 'float32' for v in updates) finally: theano.config.floatX = floatX_ def test_get_or_compute_grads(): from lasagne.updates import get_or_compute_grads A = theano.shared(1) B = theano.shared(1) loss = A + B grads = get_or_compute_grads(loss, [A, B]) assert get_or_compute_grads(grads, [A, B]) is grads with pytest.raises(ValueError): get_or_compute_grads(grads, [A]) C = T.scalar() with pytest.raises(ValueError): get_or_compute_grads(A + C, [A, C]) @pytest.mark.parametrize('ndim', [2, 3]) def test_norm_constraint(ndim): import numpy as np import theano from lasagne.updates import norm_constraint from lasagne.utils import compute_norms max_norm = 0.01 param = theano.shared( np.random.randn(*((25,) * ndim)).astype(theano.config.floatX) ) update = norm_constraint(param, max_norm) apply_update = theano.function([], [], updates=[(param, update)]) apply_update() assert param.dtype == update.dtype assert (np.max(compute_norms(param.get_value())) <= max_norm * (1 + PCT_TOLERANCE)) def test_norm_constraint_norm_axes(): import numpy as np import theano from lasagne.updates import norm_constraint from lasagne.utils import compute_norms max_norm = 0.01 norm_axes = (0, 2) param = theano.shared( np.random.randn(10, 20, 30, 40).astype(theano.config.floatX) ) update = norm_constraint(param, max_norm, norm_axes=norm_axes) apply_update = theano.function([], [], updates=[(param, update)]) apply_update() assert param.dtype == update.dtype assert (np.max(compute_norms(param.get_value(), norm_axes=norm_axes)) <= max_norm*(1 + PCT_TOLERANCE)) def test_norm_constraint_dim6_raises(): import numpy as np import theano from lasagne.updates import norm_constraint max_norm = 0.01 param = theano.shared( np.random.randn(1, 2, 3, 4, 5, 6).astype(theano.config.floatX) ) with pytest.raises(ValueError) as excinfo: norm_constraint(param, max_norm) assert "Unsupported tensor dimensionality" in str(excinfo.value) def test_total_norm_constraint(): import numpy as np import theano import theano.tensor as T from lasagne.updates import total_norm_constraint x1 = T.scalar() x2 = T.matrix() threshold = 5.0 tensors1 = total_norm_constraint([x1, x2], threshold, return_norm=False) tensors2, norm = total_norm_constraint([x1, x2], threshold, return_norm=True) f1 = theano.function([x1, x2], [tensors1[0], tensors1[1]]) f2 = theano.function([x1, x2], [tensors2[0], tensors2[1], norm]) x_test = np.arange(1+9, dtype='float32') x1_test = x_test[-1] x2_test = x_test[:9].reshape((3, 3)) x1_out1, x2_out1 = f1(x1_test, x2_test) x1_out2, x2_out2, norm = f2(x1_test, x2_test) np.testing.assert_array_almost_equal(x1_out1, x1_out2) np.testing.assert_array_almost_equal(x2_out1, x2_out2) x_out = [float(x1_out1)] + list(x2_out1.flatten()) np.testing.assert_array_almost_equal(np.linalg.norm(x_test), norm) np.testing.assert_array_almost_equal(np.linalg.norm(x_out), threshold) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/test_utils.py0000644000175000017500000002430513307306052026410 0ustar sinclairssinclairsfrom mock import Mock import pytest import numpy as np import theano import theano.tensor as T def test_int_types(): from lasagne.utils import int_types assert isinstance(42, int_types) assert isinstance(np.int8(42), int_types) assert isinstance(np.int16(42), int_types) assert isinstance(np.int32(42), int_types) assert isinstance(np.int64(42), int_types) assert isinstance(np.empty(42).shape[0], int_types) assert isinstance(np.prod(np.empty(42).shape), int_types) try: assert isinstance(long(42), int_types) except NameError: pass def test_shared_empty(): from lasagne.utils import shared_empty X = shared_empty(3) assert (np.zeros((1, 1, 1)) == X.eval()).all() def test_as_theano_expression_fails(): from lasagne.utils import as_theano_expression with pytest.raises(TypeError): as_theano_expression({}) def test_collect_shared_vars(): from lasagne.utils import collect_shared_vars as collect x, y, z = (theano.shared(0, name=n) for n in 'xyz') # collecting must not change the order assert collect([x, y, z]) == [x, y, z] # duplicates should be eliminated assert collect([x, y, x, y, y, z]) == [x, y, z] # ensure we have left-recursive depth-first search assert collect((x + y) + z) == [x, y, z] assert collect(x + (y + z)) == [x, y, z] # complex expressions and constants should not be included assert collect([x**2, y * z * np.ones(10), x + T.matrix()]) == [x, y, z] # the result can even be empty assert collect([T.matrix() + T.matrix(), T.log(T.matrix())]) == [] def test_one_hot(): from lasagne.utils import one_hot a = np.random.randint(0, 10, 20) b = np.zeros((a.size, a.max()+1)) b[np.arange(a.size), a] = 1 result = one_hot(a).eval() assert (result == b).all() def test_as_tuple_fails(): from lasagne.utils import as_tuple, int_types with pytest.raises(ValueError) as exc: as_tuple([1, 2, 3], 4) assert "length 4" in exc.value.args[0] with pytest.raises(TypeError) as exc: as_tuple('asdf', 4, int) assert "of int," in exc.value.args[0] with pytest.raises(TypeError) as exc: as_tuple('asdf', 4, (int, float)) assert "of int or float," in exc.value.args[0] with pytest.raises(TypeError) as exc: as_tuple('asdf', 4, int_types) assert "of int," in exc.value.args[0] def test_inspect_kwargs(): from lasagne.utils import inspect_kwargs assert inspect_kwargs(inspect_kwargs) == [] assert inspect_kwargs(lambda a, b, c=42, bar='asdf': 0) == ['c', 'bar'] assert inspect_kwargs(lambda x, *args, **kwargs: 0) == [] def test_compute_norms(): from lasagne.utils import compute_norms # Test numpy version of compute_norms array = np.random.randn(10, 20, 30, 40).astype(theano.config.floatX) norms = compute_norms(array) assert array.dtype == norms.dtype assert norms.shape[0] == array.shape[0] # Test theano version of compute_norms t_array = theano.shared(array) t_norms = compute_norms(t_array) # Check if they do not differ much assert np.allclose(t_norms.eval(), norms) def test_compute_norms_axes(): from lasagne.utils import compute_norms # Test numpy versions of compute norms with axes array = np.random.randn(10, 20, 30, 40).astype(theano.config.floatX) norms = compute_norms(array, norm_axes=(0, 2)) assert array.dtype == norms.dtype assert norms.shape == (array.shape[1], array.shape[3]) # Test theano version of compute_norms t_array = theano.shared(array) t_norms = compute_norms(t_array, norm_axes=(0, 2)) # Check if they do not differ much assert np.allclose(t_norms.eval(), norms) def test_compute_norms_ndim1(): from lasagne.utils import compute_norms # Test numpy versions of compute norms with axes array = np.random.randn(10, ).astype(theano.config.floatX) norms = compute_norms(array) assert array.dtype == norms.dtype assert norms.shape == array.shape # Check if they do not differ much assert np.allclose(norms, abs(array)) # Test theano version of compute_norms t_array = theano.shared(array) t_norms = compute_norms(t_array) # Check if they do not differ much assert np.allclose(t_norms.eval(), norms) def test_compute_norms_type_raises(): from lasagne.utils import compute_norms array = [[1, 2], [3, 4]] with pytest.raises(RuntimeError) as excinfo: compute_norms(array) assert ("Unsupported type") in str(excinfo.value) def test_compute_norms_ndim6_raises(): from lasagne.utils import compute_norms array = np.random.randn(1, 2, 3, 4, 5, 6).astype(theano.config.floatX) with pytest.raises(ValueError) as excinfo: compute_norms(array) assert "Unsupported tensor dimensionality" in str(excinfo.value) def test_create_param_bad_callable_raises(): from lasagne.utils import create_param with pytest.raises(TypeError): create_param(lambda x: {}, (1, 2, 3)) with pytest.raises(ValueError): create_param(lambda x: np.array(1), (1, 2, 3)) def test_create_param_bad_spec_raises(): from lasagne.utils import create_param with pytest.raises(TypeError): create_param({}, (1, 2, 3)) def test_create_param_accepts_iterable_shape(): from lasagne.utils import create_param factory = np.empty create_param(factory, [2, 3]) create_param(factory, (x for x in [2, 3])) def test_create_param_numpy_bad_shape_raises_error(): from lasagne.utils import create_param param = np.array([[1, 2, 3], [4, 5, 6]]) with pytest.raises(ValueError): create_param(param, (3, 2)) def test_create_param_numpy_returns_shared(): from lasagne.utils import create_param param = np.array([[1, 2, 3], [4, 5, 6]]) result = create_param(param, (2, 3)) assert (result.get_value() == param).all() assert isinstance(result, type(theano.shared(param))) assert (result.get_value() == param).all() def test_create_param_number_returns_same(): from lasagne.utils import create_param param = 1 result = create_param(param, ()) assert result.get_value() == param def test_create_param_numpy_generic_returns_same(): from lasagne.utils import create_param param = np.int_(2) result = create_param(param, ()) assert result.get_value() == param def test_create_param_shared_returns_same(): from lasagne.utils import create_param param = theano.shared(np.array([[1, 2, 3], [4, 5, 6]])) result = create_param(param, (2, 3)) assert result is param def test_create_param_shared_bad_ndim_raises_error(): from lasagne.utils import create_param param = theano.shared(np.array([[1, 2, 3], [4, 5, 6]])) with pytest.raises(ValueError): create_param(param, (2, 3, 4)) def test_create_param_callable_returns_return_value(): from lasagne.utils import create_param array = np.array([[1, 2, 3], [4, 5, 6]]) factory = Mock() factory.return_value = array result = create_param(factory, (2, 3)) assert (result.get_value() == array).all() factory.assert_called_with((2, 3)) def test_create_param_callable_returns_shared(): from lasagne.utils import create_param array = np.array([[1, 2, 3], [4, 5, 6]]) param = theano.shared(array) factory = Mock() factory.return_value = param result = create_param(factory, (2, 3)) assert (result.get_value() == array).all() factory.assert_called_with((2, 3)) assert result is param def test_create_param_callable_returns_shared_bad_ndim_raises_error(): from lasagne.utils import create_param array = np.array([[1, 2], [3, 4]]) param = theano.shared(array) factory = Mock() factory.return_value = param with pytest.raises(ValueError): create_param(factory, (2, 3, 4)) def test_create_param_callable_returns_theano_expr(): from lasagne.utils import create_param array = np.array([[1, 2, 3], [4, 5, 6]]) param = theano.shared(array) * 2 factory = Mock() factory.return_value = param result = create_param(factory, (2, 3)) assert (result.eval() == array * 2).all() assert result is param def test_nonpositive_dims_raises_value_error(): from lasagne.utils import create_param neg_shape = (-1, -1) zero_shape = (0, 0) pos_shape = (1, 1) spec = np.empty with pytest.raises(ValueError): create_param(spec, neg_shape) with pytest.raises(ValueError): create_param(spec, zero_shape) create_param(spec, pos_shape) def test_create_param_callable_returns_wrong_type(): from lasagne.utils import create_param param = 'string' factory = Mock() factory.return_value = param with pytest.raises(TypeError): create_param(factory, (1, 2)) def test_create_param_retain_ndarray_dtype(): from lasagne.utils import create_param param = np.array([[1, 2, 3], [4, 5, 6]]) param = param.astype('float64') result = create_param(param, (2, 3)) assert (result.dtype == param.dtype) param = param.astype('int16') result = create_param(param, (2, 3)) assert (result.dtype == param.dtype) def test_create_param_broadcast_pattern(): from lasagne.utils import create_param for shape in (10, 1, 20), (1, 2), (3, 1), (2, 3): bcast = tuple(s == 1 for s in shape) assert create_param(np.zeros, shape).broadcastable == bcast assert create_param(np.zeros(shape, np.float32), shape).broadcastable == bcast def test_unroll_scan(): from lasagne.utils import unroll_scan k = 2 a = T.scalar("a") result = unroll_scan( fn=lambda step, prior_result, a: prior_result * a, sequences=T.arange(k), outputs_info=[1.], non_sequences=[a], n_steps=k) final_result = result[-1] power = theano.function(inputs=[a], outputs=final_result) assert np.all(power(10) == [10, 100]) b = T.scalar("b") def mul_div(step, previous_mul, previous_div, mul, div): return previous_mul*mul, previous_div/div result = unroll_scan( fn=mul_div, sequences=T.arange(k), outputs_info=[1., 1.], non_sequences=[a, b], n_steps=k) power = theano.function(inputs=[a, b], outputs=result) assert np.allclose(power(10, 10), [[10, 100], [.1, .01]]) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/test_objectives.py0000644000175000017500000002646113307306052027412 0ustar sinclairssinclairsimport numpy as np import theano import pytest @pytest.mark.parametrize('colvect', (False, True)) def test_binary_crossentropy(colvect): # symbolic version from lasagne.objectives import binary_crossentropy if not colvect: p, t = theano.tensor.matrices('pt') c = binary_crossentropy(p, t) else: # check that for convenience, comparing a prediction column vector # against a 1D target vector does not lead to broadcasting p, t = theano.tensor.vectors('pt') c = binary_crossentropy(p.dimshuffle(0, 'x'), t)[:, 0] # numeric version floatX = theano.config.floatX shape = (10, 20) if not colvect else (10,) predictions = np.random.rand(*shape).astype(floatX) targets = np.random.rand(*shape).astype(floatX) crossent = (- targets * np.log(predictions) - (1-targets) * np.log(1-predictions)) # compare assert np.allclose(crossent, c.eval({p: predictions, t: targets})) def test_categorical_crossentropy(): # symbolic version from lasagne.objectives import categorical_crossentropy p, t = theano.tensor.matrices('pt') c = categorical_crossentropy(p, t) # numeric version floatX = theano.config.floatX predictions = np.random.rand(10, 20).astype(floatX) predictions /= predictions.sum(axis=1, keepdims=True) targets = np.random.rand(10, 20).astype(floatX) targets /= targets.sum(axis=1, keepdims=True) crossent = -(targets * np.log(predictions)).sum(axis=-1) # compare assert np.allclose(crossent, c.eval({p: predictions, t: targets})) def test_categorical_crossentropy_onehot(): # symbolic version from lasagne.objectives import categorical_crossentropy p = theano.tensor.matrix('p') t = theano.tensor.ivector('t') # correct class per item c = categorical_crossentropy(p, t) # numeric version floatX = theano.config.floatX predictions = np.random.rand(10, 20).astype(floatX) predictions /= predictions.sum(axis=1, keepdims=True) targets = np.random.randint(20, size=10).astype(np.uint8) crossent = -np.log(predictions[np.arange(10), targets]) # compare assert np.allclose(crossent, c.eval({p: predictions, t: targets})) @pytest.mark.parametrize('colvect', (False, True)) def test_squared_error(colvect): # symbolic version from lasagne.objectives import squared_error if not colvect: a, b = theano.tensor.matrices('ab') c = squared_error(a, b) else: a, b = theano.tensor.vectors('ab') c = squared_error(a.dimshuffle(0, 'x'), b)[:, 0] # numeric version floatX = theano.config.floatX shape = (10, 20) if not colvect else (10,) x = np.random.rand(*shape).astype(floatX) y = np.random.rand(*shape).astype(floatX) z = (x - y)**2 # compare assert np.allclose(z, c.eval({a: x, b: y})) def test_squared_error_preserve_dtype(): from lasagne.objectives import squared_error for dtype in 'float64', 'float32', 'float16': a = theano.tensor.matrix('a', dtype=dtype) b = theano.tensor.matrix('b', dtype=dtype) assert squared_error(a, b).dtype == dtype def test_aggregate_mean(): from lasagne.objectives import aggregate x = theano.tensor.matrix('x') assert theano.gof.graph.is_same_graph(aggregate(x), x.mean()) assert theano.gof.graph.is_same_graph(aggregate(x, mode='mean'), x.mean()) def test_aggregate_sum(): from lasagne.objectives import aggregate x = theano.tensor.matrix('x') assert theano.gof.graph.is_same_graph(aggregate(x, mode='sum'), x.sum()) def test_aggregate_weighted_mean(): from lasagne.objectives import aggregate x = theano.tensor.matrix('x') w = theano.tensor.matrix('w') assert theano.gof.graph.is_same_graph(aggregate(x, w), (x * w).mean()) assert theano.gof.graph.is_same_graph(aggregate(x, w, mode='mean'), (x * w).mean()) def test_aggregate_weighted_sum(): from lasagne.objectives import aggregate x = theano.tensor.matrix('x') w = theano.tensor.matrix('w') assert theano.gof.graph.is_same_graph(aggregate(x, w, mode='sum'), (x * w).sum()) def test_aggregate_weighted_normalized_sum(): from lasagne.objectives import aggregate x = theano.tensor.matrix('x') w = theano.tensor.matrix('w') assert theano.gof.graph.is_same_graph(aggregate(x, w, 'normalized_sum'), (x * w).sum() / w.sum()) def test_aggregate_invalid(): from lasagne.objectives import aggregate with pytest.raises(ValueError) as exc: aggregate(theano.tensor.matrix(), mode='asdf') assert 'mode must be' in exc.value.args[0] with pytest.raises(ValueError) as exc: aggregate(theano.tensor.matrix(), mode='normalized_sum') assert 'require weights' in exc.value.args[0] @pytest.mark.parametrize('colvect', (False, True)) def test_binary_hinge_loss(colvect): from lasagne.objectives import binary_hinge_loss p = theano.tensor.vector('p') t = theano.tensor.ivector('t') if not colvect: c = binary_hinge_loss(p, t, log_odds=True) else: c = binary_hinge_loss(p.dimshuffle(0, 'x'), t, log_odds=True)[:, 0] # numeric version floatX = theano.config.floatX predictions = np.random.rand(10).astype(floatX) targets = np.random.random_integers(0, 1, (10,)).astype("int8") hinge = np.maximum(0, 1 - predictions * (2 * targets - 1)) # compare assert np.allclose(hinge, c.eval({p: predictions, t: targets})) @pytest.mark.parametrize('colvect', (False, True)) @pytest.mark.parametrize('delta', (0.5, 1.0)) def test_huber_loss(colvect, delta): from lasagne.objectives import huber_loss if not colvect: a, b = theano.tensor.matrices('ab') l = huber_loss(a, b, delta) else: a, b = theano.tensor.vectors('ab') l = huber_loss(a.dimshuffle(0, 'x'), b, delta)[:, 0] # numeric version floatX = theano.config.floatX shape = (10, 20) if not colvect else (10,) x = np.random.rand(*shape).astype(floatX) y = np.random.rand(*shape).astype(floatX) abs_diff = abs(x - y) ift = 0.5 * abs_diff ** 2 iff = delta * (abs_diff - delta / 2.) z = np.where(abs_diff <= delta, ift, iff) # compare assert np.allclose(z, l.eval({a: x, b: y})) @pytest.mark.parametrize('colvect', (False, True)) def test_binary_hinge_loss_not_binary_targets(colvect): from lasagne.objectives import binary_hinge_loss p = theano.tensor.vector('p') t = theano.tensor.ivector('t') if not colvect: c = binary_hinge_loss(p, t, log_odds=True, binary=False) else: c = binary_hinge_loss(p.dimshuffle(0, 'x'), t, log_odds=True, binary=False)[:, 0] # numeric version floatX = theano.config.floatX predictions = np.random.rand(10, ).astype(floatX) targets = np.random.random_integers(0, 1, (10, )).astype("int8") targets = 2 * targets - 1 hinge = np.maximum(0, 1 - predictions * targets) # compare assert np.allclose(hinge, c.eval({p: predictions, t: targets})) def test_binary_hinge_loss_sigmoid_predictions(): from lasagne.objectives import binary_hinge_loss p = theano.tensor.vector('p') t = theano.tensor.ivector('t') c = binary_hinge_loss(p, t, log_odds=False) # numeric version floatX = theano.config.floatX predictions = np.random.rand(10, ).astype(floatX) targets = np.random.random_integers(0, 1, (10, )).astype("int8") targets2 = 2 * targets - 1 hinge = np.maximum(0, 1 - np.log(predictions / (1-predictions)) * targets2) # compare assert np.allclose(hinge, c.eval({p: predictions, t: targets})) def test_multiclass_hinge_loss(): from lasagne.objectives import multiclass_hinge_loss from lasagne.nonlinearities import rectify p = theano.tensor.matrix('p') t = theano.tensor.ivector('t') c = multiclass_hinge_loss(p, t) # numeric version floatX = theano.config.floatX predictions = np.random.rand(10, 20).astype(floatX) targets = np.random.random_integers(0, 19, (10,)).astype("int8") one_hot = np.zeros((10, 20)) one_hot[np.arange(10), targets] = 1 correct = predictions[one_hot > 0] rest = predictions[one_hot < 1].reshape((10, 19)) rest = np.max(rest, axis=1) hinge = rectify(1 + rest - correct) # compare assert np.allclose(hinge, c.eval({p: predictions, t: targets})) def test_multiclass_hinge_loss_invalid(): from lasagne.objectives import multiclass_hinge_loss with pytest.raises(TypeError) as exc: multiclass_hinge_loss(theano.tensor.vector(), theano.tensor.matrix()) assert 'rank mismatch' in exc.value.args[0] @pytest.mark.parametrize('colvect', (False, True)) def test_binary_accuracy(colvect): from lasagne.objectives import binary_accuracy p = theano.tensor.vector('p') t = theano.tensor.ivector('t') if not colvect: c = binary_accuracy(p, t) else: c = binary_accuracy(p.dimshuffle(0, 'x'), t)[:, 0] # numeric version floatX = theano.config.floatX predictions = np.random.rand(10, ).astype(floatX) > 0.5 targets = np.random.random_integers(0, 1, (10,)).astype("int8") accuracy = predictions == targets # compare assert np.allclose(accuracy, c.eval({p: predictions, t: targets})) def test_categorical_accuracy(): from lasagne.objectives import categorical_accuracy p = theano.tensor.matrix('p') t = theano.tensor.ivector('t') c = categorical_accuracy(p, t) # numeric version floatX = theano.config.floatX predictions = np.random.rand(100, 5).astype(floatX) cls_predictions = np.argmax(predictions, axis=1) targets = np.random.random_integers(0, 4, (100,)).astype("int8") accuracy = cls_predictions == targets # compare assert np.allclose(accuracy, c.eval({p: predictions, t: targets})) one_hot = np.zeros((100, 5)).astype("int8") one_hot[np.arange(100), targets] = 1 t = theano.tensor.imatrix('t') c = categorical_accuracy(p, t) assert np.allclose(accuracy, c.eval({p: predictions, t: one_hot})) def test_categorical_accuracy_top_k(): from lasagne.objectives import categorical_accuracy p = theano.tensor.matrix('p') t = theano.tensor.ivector('t') top_k = 4 c = categorical_accuracy(p, t, top_k=top_k) # numeric version floatX = theano.config.floatX predictions = np.random.rand(10, 20).astype(floatX) cls_predictions = np.argsort(predictions, axis=1).astype("int8") # (construct targets such that top-1 to top-10 predictions are in there) targets = cls_predictions[np.arange(10), -np.random.permutation(10)] top_predictions = cls_predictions[:, -top_k:] accuracy = np.any(top_predictions == targets[:, np.newaxis], axis=1) # compare assert np.allclose(accuracy, c.eval({p: predictions, t: targets})) one_hot = np.zeros((10, 20)).astype("int8") one_hot[np.arange(10), targets] = 1 t = theano.tensor.imatrix('t') c = categorical_accuracy(p, t, top_k=top_k) assert np.allclose(accuracy, c.eval({p: predictions, t: one_hot})) def test_categorial_accuracy_invalid(): from lasagne.objectives import categorical_accuracy with pytest.raises(TypeError) as exc: categorical_accuracy(theano.tensor.vector(), theano.tensor.matrix()) assert 'rank mismatch' in exc.value.args[0] Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/conftest.py0000644000175000017500000000043513307306052026034 0ustar sinclairssinclairsimport pytest def pytest_addoption(parser): parser.addoption("--runslow", action="store_true", help="run slow tests") def pytest_runtest_setup(item): if 'slow' in item.keywords and not item.config.getoption("--runslow"): pytest.skip("need --runslow option to run") Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/test_init.py0000644000175000017500000002240613307306052026213 0ustar sinclairssinclairsimport pytest def test_initializer_sample(): from lasagne.init import Initializer with pytest.raises(NotImplementedError): Initializer().sample((100, 100)) def test_shape(): from lasagne.init import Initializer # Assert that all `Initializer` sublasses return the shape that # we've asked for in `sample`: for klass in Initializer.__subclasses__(): if len(klass.__subclasses__()): # check HeNormal, HeUniform, GlorotNormal, GlorotUniform for sub_klass in klass.__subclasses__(): assert sub_klass().sample((12, 23)).shape == (12, 23) else: assert klass().sample((12, 23)).shape == (12, 23) def test_specified_rng(): from lasagne.random import get_rng, set_rng from lasagne.init import (Normal, Uniform, GlorotNormal, GlorotUniform, Sparse, Orthogonal) from numpy.random import RandomState from numpy import allclose seed = 123456789 rng = get_rng() for init_class in [Normal, Uniform, GlorotNormal, GlorotUniform, Sparse, Orthogonal]: set_rng(RandomState(seed)) sample1 = init_class().sample((100, 100)) set_rng(RandomState(seed)) sample2 = init_class().sample((100, 100)) set_rng(rng) # reset to original RNG for other tests assert allclose(sample1, sample2),\ ("random initialization was inconsistent for {}" .format(init_class.__name__)) def test_normal(): from lasagne.init import Normal sample = Normal().sample((100, 200)) assert -0.001 < sample.mean() < 0.001 assert 0.009 < sample.std() < 0.011 def test_uniform_range_as_number(): from lasagne.init import Uniform sample = Uniform(1.0).sample((300, 400)) assert sample.shape == (300, 400) assert -1.0 <= sample.min() < -0.9 assert 0.9 < sample.max() <= 1.0 def test_uniform_range_as_range(): from lasagne.init import Uniform sample = Uniform((0.0, 1.0)).sample((300, 400)) assert sample.shape == (300, 400) assert 0.0 <= sample.min() < 0.1 assert 0.9 < sample.max() <= 1.0 def test_uniform_mean_std(): from lasagne.init import Uniform sample = Uniform(std=1.0, mean=5.0).sample((300, 400)) assert 4.9 < sample.mean() < 5.1 assert 0.9 < sample.std() < 1.1 def test_glorot_normal(): from lasagne.init import GlorotNormal sample = GlorotNormal().sample((100, 100)) assert -0.01 < sample.mean() < 0.01 assert 0.09 < sample.std() < 0.11 def test_glorot_1d_not_supported(): from lasagne.init import GlorotNormal with pytest.raises(RuntimeError): GlorotNormal().sample((100,)) def test_glorot_normal_receptive_field(): from lasagne.init import GlorotNormal sample = GlorotNormal().sample((50, 50, 2)) assert -0.01 < sample.mean() < 0.01 assert 0.09 < sample.std() < 0.11 def test_glorot_normal_gain(): from lasagne.init import GlorotNormal sample = GlorotNormal(gain=10.0).sample((100, 100)) assert -0.1 < sample.mean() < 0.1 assert 0.9 < sample.std() < 1.1 sample = GlorotNormal(gain='relu').sample((100, 100)) assert -0.01 < sample.mean() < 0.01 assert 0.132 < sample.std() < 0.152 def test_glorot_normal_c01b(): from lasagne.init import GlorotNormal sample = GlorotNormal(c01b=True).sample((25, 2, 2, 25)) assert -0.01 < sample.mean() < 0.01 assert 0.09 < sample.std() < 0.11 def test_glorot_normal_c01b_4d_only(): from lasagne.init import GlorotNormal with pytest.raises(RuntimeError): GlorotNormal(c01b=True).sample((100,)) with pytest.raises(RuntimeError): GlorotNormal(c01b=True).sample((100, 100)) with pytest.raises(RuntimeError): GlorotNormal(c01b=True).sample((100, 100, 100)) def test_glorot_uniform(): from lasagne.init import GlorotUniform sample = GlorotUniform().sample((150, 450)) assert -0.1 <= sample.min() < -0.09 assert 0.09 < sample.max() <= 0.1 def test_glorot_uniform_receptive_field(): from lasagne.init import GlorotUniform sample = GlorotUniform().sample((150, 150, 2)) assert -0.10 <= sample.min() < -0.09 assert 0.09 < sample.max() <= 0.10 def test_glorot_uniform_gain(): from lasagne.init import GlorotUniform sample = GlorotUniform(gain=10.0).sample((150, 450)) assert -1.0 <= sample.min() < -0.9 assert 0.9 < sample.max() <= 1.0 sample = GlorotUniform(gain='relu').sample((100, 100)) assert -0.01 < sample.mean() < 0.01 assert 0.132 < sample.std() < 0.152 def test_glorot_uniform_c01b(): from lasagne.init import GlorotUniform sample = GlorotUniform(c01b=True).sample((75, 2, 2, 75)) assert -0.1 <= sample.min() < -0.09 assert 0.09 < sample.max() <= 0.1 def test_glorot_uniform_c01b_4d_only(): from lasagne.init import GlorotUniform with pytest.raises(RuntimeError): GlorotUniform(c01b=True).sample((100,)) with pytest.raises(RuntimeError): GlorotUniform(c01b=True).sample((100, 100)) with pytest.raises(RuntimeError): GlorotUniform(c01b=True).sample((100, 100, 100)) def test_he_normal(): from lasagne.init import HeNormal sample = HeNormal().sample((100, 100)) assert -0.01 < sample.mean() < 0.01 assert 0.09 < sample.std() < 0.11 def test_he_1d_not_supported(): from lasagne.init import HeNormal with pytest.raises(RuntimeError): HeNormal().sample((100,)) def test_he_normal_receptive_field(): from lasagne.init import HeNormal sample = HeNormal().sample((50, 50, 2)) assert -0.01 < sample.mean() < 0.01 assert 0.09 < sample.std() < 0.11 def test_he_normal_gain(): from lasagne.init import HeNormal sample = HeNormal(gain=10.0).sample((100, 100)) assert -0.1 < sample.mean() < 0.1 assert 0.9 < sample.std() < 1.1 sample = HeNormal(gain='relu').sample((200, 50)) assert -0.1 < sample.mean() < 0.1 assert 0.07 < sample.std() < 0.12 def test_he_normal_c01b(): from lasagne.init import HeNormal sample = HeNormal(c01b=True).sample((25, 2, 2, 25)) assert -0.01 < sample.mean() < 0.01 assert 0.09 < sample.std() < 0.11 def test_he_normal_c01b_4d_only(): from lasagne.init import HeNormal with pytest.raises(RuntimeError): HeNormal(c01b=True).sample((100,)) with pytest.raises(RuntimeError): HeNormal(c01b=True).sample((100, 100)) with pytest.raises(RuntimeError): HeNormal(c01b=True).sample((100, 100, 100)) def test_he_uniform(): from lasagne.init import HeUniform sample = HeUniform().sample((300, 200)) assert -0.1 <= sample.min() < -0.09 assert 0.09 < sample.max() <= 0.1 def test_he_uniform_receptive_field(): from lasagne.init import HeUniform sample = HeUniform().sample((150, 150, 2)) assert -0.10 <= sample.min() < -0.09 assert 0.09 < sample.max() <= 0.10 def test_he_uniform_gain(): from lasagne.init import HeUniform sample = HeUniform(gain=10.0).sample((300, 200)) assert -1.0 <= sample.min() < -0.9 assert 0.9 < sample.max() <= 1.0 sample = HeUniform(gain='relu').sample((100, 100)) assert -0.1 < sample.mean() < 0.1 assert 0.1 < sample.std() < 0.2 def test_he_uniform_c01b(): from lasagne.init import HeUniform sample = HeUniform(c01b=True).sample((75, 2, 2, 75)) assert -0.1 <= sample.min() < -0.09 assert 0.09 < sample.max() <= 0.1 def test_he_uniform_c01b_4d_only(): from lasagne.init import HeUniform with pytest.raises(RuntimeError): HeUniform(c01b=True).sample((100,)) with pytest.raises(RuntimeError): HeUniform(c01b=True).sample((100, 100)) with pytest.raises(RuntimeError): HeUniform(c01b=True).sample((100, 100, 100)) def test_constant(): from lasagne.init import Constant sample = Constant(1.0).sample((10, 20)) assert (sample == 1.0).all() def test_sparse(): from lasagne.init import Sparse sample = Sparse(sparsity=0.1).sample((10, 20)) assert (sample != 0.0).sum() == (10 * 20) * 0.1 def test_sparse_1d_not_supported(): from lasagne.init import Sparse with pytest.raises(RuntimeError): Sparse().sample((100,)) def test_orthogonal(): import numpy as np from lasagne.init import Orthogonal sample = Orthogonal().sample((100, 200)) assert np.allclose(np.dot(sample, sample.T), np.eye(100), atol=1e-6) sample = Orthogonal().sample((200, 100)) assert np.allclose(np.dot(sample.T, sample), np.eye(100), atol=1e-6) def test_orthogonal_gain(): import numpy as np from lasagne.init import Orthogonal gain = 2 sample = Orthogonal(gain).sample((100, 200)) assert np.allclose(np.dot(sample, sample.T), gain * gain * np.eye(100), atol=1e-6) gain = np.sqrt(2) sample = Orthogonal('relu').sample((100, 200)) assert np.allclose(np.dot(sample, sample.T), gain * gain * np.eye(100), atol=1e-6) def test_orthogonal_multi(): import numpy as np from lasagne.init import Orthogonal sample = Orthogonal().sample((100, 50, 80)) sample = sample.reshape(100, 50*80) assert np.allclose(np.dot(sample, sample.T), np.eye(100), atol=1e-6) def test_orthogonal_1d_not_supported(): from lasagne.init import Orthogonal with pytest.raises(RuntimeError): Orthogonal().sample((100,)) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/test_nonlinearities.py0000644000175000017500000000630213307306052030270 0ustar sinclairssinclairsimport pytest import numpy as np import theano.tensor as T class TestNonlinearities(object): def linear(self, x): return x def rectify(self, x): return x * (x > 0) def leaky_rectify(self, x): return x * (x > 0) + 0.01 * x * (x < 0) def leaky_rectify_0(self, x): return self.rectify(x) def elu(self, x, alpha=1): return np.where(x > 0, x, alpha * (np.expm1(x))) def selu(self, x, alpha=1, lmbda=1): return lmbda * np.where(x > 0, x, alpha * np.expm1(x)) def selu_paper(self, x): return self.selu(x, alpha=1.6732632423543772848170429916717, lmbda=1.0507009873554804934193349852946) def selu_rect(self, x): return self.selu(x, alpha=0, lmbda=1) def selu_custom(self, x): return self.selu(x, alpha=0.12, lmbda=1.21) def softplus(self, x): return np.log1p(np.exp(x)) def sigmoid(self, x): return 1 / (1 + np.exp(-x)) def tanh(self, x): return np.tanh(x) def scaled_tanh(self, x): return np.tanh(x) def scaled_tanh_p(self, x): return 2.27 * np.tanh(0.5 * x) def softmax(self, x): return (np.exp(x).T / np.exp(x).sum(-1)).T @pytest.mark.parametrize('nonlinearity', ['linear', 'rectify', 'leaky_rectify', 'elu', 'selu', 'selu_paper', 'selu_rect', 'selu_custom', 'sigmoid', 'tanh', 'scaled_tanh', 'softmax', 'leaky_rectify_0', 'scaled_tanh_p', 'softplus']) def test_nonlinearity(self, nonlinearity): import lasagne.nonlinearities if nonlinearity == 'leaky_rectify_0': from lasagne.nonlinearities import LeakyRectify theano_nonlinearity = LeakyRectify(leakiness=0) elif nonlinearity == 'scaled_tanh': from lasagne.nonlinearities import ScaledTanH theano_nonlinearity = ScaledTanH() elif nonlinearity == 'scaled_tanh_p': from lasagne.nonlinearities import ScaledTanH theano_nonlinearity = ScaledTanH(scale_in=0.5, scale_out=2.27) elif nonlinearity.startswith('selu'): from lasagne.nonlinearities import SELU, selu if nonlinearity == 'selu': theano_nonlinearity = SELU() elif nonlinearity == 'selu_paper': theano_nonlinearity = selu elif nonlinearity == 'selu_rect': theano_nonlinearity = SELU(scale=1, scale_neg=0) elif nonlinearity == 'selu_custom': theano_nonlinearity = SELU(scale=1.21, scale_neg=0.12) else: theano_nonlinearity = getattr(lasagne.nonlinearities, nonlinearity) np_nonlinearity = getattr(self, nonlinearity) X = T.matrix() X0 = lasagne.utils.floatX(np.random.uniform(-3, 3, (10, 10))) theano_result = theano_nonlinearity(X).eval({X: X0}) np_result = np_nonlinearity(X0) assert np.allclose(theano_result, np_result) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/test_theano_extensions.py0000644000175000017500000001155313307306052031006 0ustar sinclairssinclairsimport pytest import numpy as np import theano.tensor as T import lasagne def conv1d(input, kernel, stride=1): output = [] for b in input: temp = [] for c in kernel: temp.append( np.convolve(b[0, :], c[0, :], mode='valid')) output.append(temp) return np.array(output)[:, :, ::stride] @pytest.mark.parametrize('impl', ['conv1d_sc', 'conv1d_mc0', 'conv1d_mc1', 'conv1d_unstrided', 'conv1d_sd', 'conv1d_md']) @pytest.mark.parametrize('filter_flip', [True, False]) @pytest.mark.parametrize('stride', [1, 2]) def test_conv(impl, stride, filter_flip): import lasagne.theano_extensions.conv conv = getattr(lasagne.theano_extensions.conv, impl) X = T.tensor3() W = T.tensor3() input = lasagne.utils.floatX(np.ones((1, 1, 10))) kernel = lasagne.utils.floatX(np.random.uniform(-1, 1, (2, 1, 6))) conv_theano = conv(X, W, input.shape, kernel.shape, subsample=(stride,), filter_flip=filter_flip).eval({X: input, W: kernel}) conv_np = conv1d(input, kernel, stride) assert np.allclose(conv_theano, conv_np) @pytest.mark.parametrize('impl', ['conv1d_sc', 'conv1d_mc0', 'conv1d_mc1']) def test_conv_nones(impl): import lasagne.theano_extensions.conv conv = getattr(lasagne.theano_extensions.conv, impl) X = T.tensor3() W = T.tensor3() input = lasagne.utils.floatX(np.ones((1, 1, 12))) kernel = lasagne.utils.floatX(np.random.uniform(-1, 1, (2, 1, 3))) conv_theano = conv(X, W, None, None).eval({ X: input, W: kernel }) conv_np = conv1d(input, kernel) assert np.allclose(conv_theano, conv_np) @pytest.mark.parametrize('impl', ['conv1d_mc0', 'conv1d_mc1']) @pytest.mark.parametrize('pad', [1, (2,)]) def test_conv_pad(impl, pad): import lasagne.theano_extensions.conv conv = getattr(lasagne.theano_extensions.conv, impl) X = T.tensor3() W = T.tensor3() input = lasagne.utils.floatX(np.ones((1, 1, 12))) kernel = lasagne.utils.floatX(np.random.uniform(-1, 1, (2, 1, 3))) conv_theano = conv(X, W, input.shape, kernel.shape, border_mode=pad).eval({ X: input, W: kernel }) pad = pad[0] if isinstance(pad, tuple) else pad input = np.pad(input, [(0, 0), (0, 0), (pad, pad)], mode='constant') conv_np = conv1d(input, kernel) assert np.allclose(conv_theano, conv_np) @pytest.mark.parametrize('impl', ['conv1d_sc', 'conv1d_mc0', 'conv1d_mc1', 'conv1d_unstrided', 'conv1d_sd', 'conv1d_md']) def test_conv_invalid_border_mode(impl): import lasagne.theano_extensions.conv conv = getattr(lasagne.theano_extensions.conv, impl) X = T.tensor3() W = T.tensor3() with pytest.raises(Exception): conv(X, W, (1, 1, 10), (2, 1, 3), border_mode=None) @pytest.mark.parametrize('impl', ['conv1d_unstrided', 'conv1d_sd', 'conv1d_md']) def test_conv_stride(impl): import lasagne.theano_extensions.conv conv = getattr(lasagne.theano_extensions.conv, impl) X = T.tensor3() W = T.tensor3() with pytest.raises(Exception): conv(X, W, (1, 1, 10), (2, 1, 3), subsample=(2,)) @pytest.mark.parametrize('val', [0, 7]) @pytest.mark.parametrize('batch_ndim', [1, 2]) def test_pad(batch_ndim, val, width=3): from lasagne.theano_extensions.padding import pad X = T.tensor4() X0 = lasagne.utils.floatX(np.ones((2, 3, 4, 5))) X_pad_theano = pad(X, width, val, batch_ndim).eval({X: X0}) pads = tuple((width, width) if i >= batch_ndim else (0, 0) for i, _ in enumerate(X0.shape)) X_pad_np = np.pad(X0, pads, mode='constant', constant_values=val) assert (X_pad_theano == X_pad_np).all() @pytest.mark.parametrize('batch_ndim', [1, 2]) def test_pad_width_per_axis(batch_ndim, val=0): from lasagne.theano_extensions.padding import pad width = (1, 2, 3, 4) X = T.tensor4() X0 = lasagne.utils.floatX(np.ones((2, 3, 4, 5))) X_pad_theano = pad(X, width[batch_ndim:], val, batch_ndim).eval({X: X0}) pads = tuple((w, w) if i >= batch_ndim else (0, 0) for i, w in enumerate(width)) X_pad_np = np.pad(X0, pads, mode='constant', constant_values=val) assert (X_pad_theano == X_pad_np).all() @pytest.mark.parametrize('batch_ndim', [1, 2]) def test_pad_width_per_border(batch_ndim, val=0): from lasagne.theano_extensions.padding import pad width = [(1, 2), (3, 4), (1, 2), (3, 4)] X = T.tensor4() X0 = lasagne.utils.floatX(np.ones((2, 3, 4, 5))) X_pad_theano = pad(X, width[batch_ndim:], val, batch_ndim).eval({X: X0}) pads = tuple(w if i >= batch_ndim else (0, 0) for i, w in enumerate(width)) X_pad_np = np.pad(X0, pads, mode='constant', constant_values=val) assert (X_pad_theano == X_pad_np).all() Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/test_examples.py0000644000175000017500000000176513307306052027073 0ustar sinclairssinclairsfrom glob import glob from importlib import import_module from os.path import basename from os.path import dirname from os.path import join from os.path import splitext import sys import pytest EXAMPLES_DIR = join(dirname(dirname(dirname(__file__))), 'examples') def _example_modules(): paths = glob(join(EXAMPLES_DIR, "*py")) return [splitext(basename(path))[0] for path in paths] @pytest.fixture def example(request): sys.path.insert(0, EXAMPLES_DIR) request.addfinalizer(lambda: sys.path.remove(EXAMPLES_DIR)) @pytest.mark.slow @pytest.mark.parametrize("module_name", _example_modules()) def test_example(example, module_name): try: main = getattr(import_module(module_name), 'main') except ImportError as e: skip_exceptions = ["requires a GPU", "pylearn2", "dnn not available"] if any([text in str(e) for text in skip_exceptions]): pytest.skip(e) else: raise main(num_epochs=1) # run the example for one iteration Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/0000755000175000017500000000000013307306052025132 5ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_helper.py0000644000175000017500000010267613307306052030036 0ustar sinclairssinclairsimport warnings from mock import Mock, PropertyMock import pytest import numpy import theano class TestGetAllLayers: def test_stack(self): from lasagne.layers import InputLayer, DenseLayer, get_all_layers from itertools import permutations # l1 --> l2 --> l3 l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 40) # try all possible combinations and orders for a query for count in (0, 1, 2, 3): for query in permutations([l1, l2, l3], count): if l3 in query: expected = [l1, l2, l3] elif l2 in query: expected = [l1, l2] elif l1 in query: expected = [l1] else: expected = [] assert get_all_layers(query) == expected # treat_as_input=[l2] should block l1 from appearing assert get_all_layers(l3, treat_as_input=[l2]) == [l2, l3] def test_merge(self): from lasagne.layers import (InputLayer, DenseLayer, ElemwiseSumLayer, get_all_layers) # l1 --> l2 --> l3 --> l6 # l4 --> l5 ----^ l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 40) l4 = InputLayer((10, 30)) l5 = DenseLayer(l4, 40) l6 = ElemwiseSumLayer([l3, l5]) # try various combinations and orders for a query assert get_all_layers(l6) == [l1, l2, l3, l4, l5, l6] assert get_all_layers([l4, l6]) == [l4, l1, l2, l3, l5, l6] assert get_all_layers([l5, l6]) == [l4, l5, l1, l2, l3, l6] assert get_all_layers([l4, l2, l5, l6]) == [l4, l1, l2, l5, l3, l6] # check that treat_as_input correctly blocks the search assert get_all_layers(l6, treat_as_input=[l2]) == [l2, l3, l4, l5, l6] assert get_all_layers(l6, treat_as_input=[l3, l5]) == [l3, l5, l6] assert get_all_layers([l6, l2], treat_as_input=[l6]) == [l6, l1, l2] def test_split(self): from lasagne.layers import InputLayer, DenseLayer, get_all_layers # l1 --> l2 --> l3 # \---> l4 l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 40) l4 = DenseLayer(l1, 50) # try various combinations and orders for a query assert get_all_layers(l3) == [l1, l2, l3] assert get_all_layers(l4) == [l1, l4] assert get_all_layers([l3, l4]) == [l1, l2, l3, l4] assert get_all_layers([l4, l3]) == [l1, l4, l2, l3] # check that treat_as_input correctly blocks the search assert get_all_layers(l3, treat_as_input=[l2]) == [l2, l3] assert get_all_layers([l3, l4], treat_as_input=[l2]) == [l2, l3, l1, l4] def test_bridge(self): from lasagne.layers import (InputLayer, DenseLayer, ElemwiseSumLayer, get_all_layers) # l1 --> l2 --> l3 --> l4 --> l5 # \------------^ l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 30) l4 = ElemwiseSumLayer([l2, l3]) l5 = DenseLayer(l4, 40) # check for correct topological order assert get_all_layers(l5) == [l1, l2, l3, l4, l5] # check that treat_as_input=[l4] blocks the search and =[l3] does not assert get_all_layers(l5, treat_as_input=[l4]) == [l4, l5] assert get_all_layers(l5, treat_as_input=[l3]) == [l1, l2, l3, l4, l5] class TestGetOutput_InputLayer: @pytest.fixture def get_output(self): from lasagne.layers.helper import get_output return get_output @pytest.fixture def layer(self): from lasagne.layers.input import InputLayer return InputLayer((3, 2)) def test_get_output_without_arguments(self, layer, get_output): assert get_output(layer) is layer.input_var def test_get_output_input_is_variable(self, layer, get_output): variable = theano.Variable("myvariable") assert get_output(layer, variable) is variable def test_get_output_input_is_array(self, layer, get_output): inputs = [[1, 2, 3]] output = get_output(layer, inputs) assert numpy.all(output.eval() == inputs) def test_get_output_input_is_a_mapping(self, layer, get_output): inputs = {layer: theano.tensor.matrix()} assert get_output(layer, inputs) is inputs[layer] class TestGetOutput_Layer: @pytest.fixture def get_output(self): from lasagne.layers.helper import get_output return get_output @pytest.fixture def layers(self): from lasagne.layers.base import Layer from lasagne.layers.input import InputLayer # create a mock that has the same attributes as an InputLayer instance l1 = Mock(InputLayer((None,)), output_shape=(None,), get_output_kwargs=[]) # create a mock that has the same attributes as a Layer instance l2 = Mock(Layer(l1), output_shape=(None,), get_output_kwargs=[]) # link it to the InputLayer mock l2.input_layer = l1 # create another mock that has the same attributes as a Layer instance l3 = Mock(Layer(l2), output_shape=(None,), get_output_kwargs=['kwarg']) # link it to the first mock, to get an "l1 --> l2 --> l3" chain l3.input_layer = l2 return l1, l2, l3 def test_get_output_without_arguments(self, layers, get_output): l1, l2, l3 = layers output = get_output(l3) # expected: l3.get_output_for(l2.get_output_for(l1.input_var)) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with( l2.get_output_for.return_value) l2.get_output_for.assert_called_with( l1.input_var) def test_get_output_with_single_argument(self, layers, get_output): l1, l2, l3 = layers inputs, kwarg = theano.tensor.matrix(), object() output = get_output(l3, inputs, kwarg=kwarg) # expected: l3.get_output_for(l2.get_output_for(inputs, kwarg=kwarg), # kwarg=kwarg) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with( l2.get_output_for.return_value, kwarg=kwarg) l2.get_output_for.assert_called_with( inputs, kwarg=kwarg) def test_get_output_input_is_a_mapping(self, layers, get_output): l1, l2, l3 = layers p = PropertyMock() type(l1).input_var = p inputs = {l3: theano.tensor.matrix()} # expected: inputs[l3] assert get_output(l3, inputs) is inputs[l3] # l3.get_output_for, l2.get_output_for should not have been called assert l3.get_output_for.call_count == 0 assert l2.get_output_for.call_count == 0 # l1.input_var should not have been accessed assert p.call_count == 0 def test_get_output_input_is_a_mapping_no_key(self, layers, get_output): l1, l2, l3 = layers output = get_output(l3, {}) # expected: l3.get_output_for(l2.get_output_for(l1.input_var)) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with( l2.get_output_for.return_value) l2.get_output_for.assert_called_with( l1.input_var) def test_get_output_input_is_a_mapping_to_array(self, layers, get_output): l1, l2, l3 = layers p = PropertyMock() type(l1).input_var = p inputs = {l3: [[1, 2, 3]]} output = get_output(l3, inputs) # expected: inputs[l3] assert numpy.all(output.eval() == inputs[l3]) # l3.get_output_for, l2.get_output_for should not have been called assert l3.get_output_for.call_count == 0 assert l2.get_output_for.call_count == 0 # l1.input_var should not have been accessed assert p.call_count == 0 def test_get_output_input_is_a_mapping_for_layer(self, layers, get_output): l1, l2, l3 = layers p = PropertyMock() type(l1).input_var = p input_expr, kwarg = theano.tensor.matrix(), object() inputs = {l2: input_expr} output = get_output(l3, inputs, kwarg=kwarg) # expected: l3.get_output_for(input_expr, kwarg=kwarg) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with(input_expr, kwarg=kwarg) # l2.get_output_for should not have been called assert l2.get_output_for.call_count == 0 # l1.input_var should not have been accessed assert p.call_count == 0 def test_get_output_input_is_a_mapping_for_input_layer(self, layers, get_output): l1, l2, l3 = layers p = PropertyMock() type(l1).input_var = p input_expr, kwarg = theano.tensor.matrix(), object() inputs = {l1: input_expr} output = get_output(l3, inputs, kwarg=kwarg) # expected: l3.get_output_for(l2.get_output_for(input_expr, # kwarg=kwarg), # kwarg=kwarg) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with( l2.get_output_for.return_value, kwarg=kwarg) l2.get_output_for.assert_called_with( input_expr, kwarg=kwarg) # l1.input_var should not have been accessed assert p.call_count == 0 def test_get_output_with_unused_kwarg(self, layers, get_output): l1, l2, l3 = layers l2.get_output_for = lambda data, asdf=123, **kwargs: data unused_kwarg = object() with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') get_output(l3, kwagg=unused_kwarg) assert len(w) == 1 assert issubclass(w[0].category, UserWarning) assert 'perhaps you meant kwarg' in str(w[0].message) with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') get_output(l3, adsf=unused_kwarg) assert len(w) == 1 assert issubclass(w[0].category, UserWarning) assert 'perhaps you meant asdf' in str(w[0].message) def test_get_output_with_no_unused_kwarg(self, layers, get_output): l1, l2, l3 = layers with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') get_output(l3) assert len(w) == 0 @pytest.fixture def layer_from_shape(self): from lasagne.layers.base import Layer return Layer((None, 20)) def test_layer_from_shape_invalid_get_output(self, layer_from_shape, get_output): from lasagne.layers.base import Layer layer = layer_from_shape with pytest.raises(ValueError): get_output(layer) with pytest.raises(ValueError): get_output(layer, [1, 2]) with pytest.raises(ValueError): get_output(layer, {Mock(spec=Layer): [1, 2]}) def test_layer_from_shape_valid_get_output(self, layer_from_shape, get_output): layer = layer_from_shape inputs = {layer: theano.tensor.matrix()} assert get_output(layer, inputs) is inputs[layer] inputs = {None: theano.tensor.matrix()} layer.get_output_for = Mock() assert get_output(layer, inputs) is layer.get_output_for.return_value layer.get_output_for.assert_called_with(inputs[None]) class TestGetOutput_MergeLayer: @pytest.fixture def get_output(self): from lasagne.layers.helper import get_output return get_output @pytest.fixture def layers(self): from lasagne.layers.base import Layer, MergeLayer from lasagne.layers.input import InputLayer # create two mocks of the same attributes as an InputLayer instance l1 = [Mock(InputLayer((None,)), output_shape=(None,), get_output_kwargs=[]), Mock(InputLayer((None,)), output_shape=(None,), get_output_kwargs=[])] # create two mocks of the same attributes as a Layer instance l2 = [Mock(Layer(l1[0]), output_shape=(None,), get_output_kwargs=[]), Mock(Layer(l1[1]), output_shape=(None,), get_output_kwargs=[])] # link them to the InputLayer mocks l2[0].input_layer = l1[0] l2[1].input_layer = l1[1] # create a mock that has the same attributes as a MergeLayer l3 = Mock(MergeLayer(l2), get_output_kwargs=['kwarg']) # link it to the two layer mocks, to get the following network: # l1[0] --> l2[0] --> l3 # l1[1] --> l2[1] ----^ l3.input_layers = l2 return l1, l2, l3 def test_get_output_without_arguments(self, layers, get_output): l1, l2, l3 = layers output = get_output(l3) # expected: l3.get_output_for([l2[0].get_output_for(l1[0].input_var), # l2[1].get_output_for(l1[1].input_var)]) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with([ l2[0].get_output_for.return_value, l2[1].get_output_for.return_value, ]) l2[0].get_output_for.assert_called_with( l1[0].input_var) l2[1].get_output_for.assert_called_with( l1[1].input_var) def test_get_output_with_single_argument_fails(self, layers, get_output): l1, l2, l3 = layers inputs, kwarg = theano.tensor.matrix(), object() # expected to fail: only gave one expression for two input layers with pytest.raises(ValueError): output = get_output(l3, inputs, kwarg=kwarg) def test_get_output_input_is_a_mapping(self, layers, get_output): l1, l2, l3 = layers p = PropertyMock() type(l1[0]).input_var = p type(l1[1]).input_var = p inputs = {l3: theano.tensor.matrix()} # expected: inputs[l3] assert get_output(l3, inputs) is inputs[l3] # l3.get_output_for, l2[*].get_output_for should not have been called assert l3.get_output_for.call_count == 0 assert l2[0].get_output_for.call_count == 0 assert l2[1].get_output_for.call_count == 0 # l1[*].input_var should not have been accessed assert p.call_count == 0 def test_get_output_input_is_a_mapping_no_key(self, layers, get_output): l1, l2, l3 = layers output = get_output(l3, {}) # expected: l3.get_output_for([l2[0].get_output_for(l1[0].input_var), # l2[1].get_output_for(l1[1].input_var)]) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with([ l2[0].get_output_for.return_value, l2[1].get_output_for.return_value, ]) l2[0].get_output_for.assert_called_with( l1[0].input_var) l2[1].get_output_for.assert_called_with( l1[1].input_var) def test_get_output_input_is_a_mapping_to_array(self, layers, get_output): l1, l2, l3 = layers p = PropertyMock() type(l1[0]).input_var = p type(l1[1]).input_var = p inputs = {l3: [[1, 2, 3]]} output = get_output(l3, inputs) # expected: inputs[l3] assert numpy.all(output.eval() == inputs[l3]) # l3.get_output_for, l2[*].get_output_for should not have been called assert l3.get_output_for.call_count == 0 assert l2[0].get_output_for.call_count == 0 assert l2[1].get_output_for.call_count == 0 # l1[*].input_var should not have been accessed assert p.call_count == 0 def test_get_output_input_is_a_mapping_for_layer(self, layers, get_output): l1, l2, l3 = layers p = PropertyMock() type(l1[0]).input_var = p input_expr, kwarg = theano.tensor.matrix(), object() inputs = {l2[0]: input_expr} output = get_output(l3, inputs, kwarg=kwarg) # expected: l3.get_output_for([input_expr, # l2[1].get_output_for(l1[1].input_var, # kwarg=kwarg)], # kwarg=kwarg) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with([ input_expr, l2[1].get_output_for.return_value, ], kwarg=kwarg) l2[1].get_output_for.assert_called_with( l1[1].input_var, kwarg=kwarg) # l2[0].get_output_for should not have been called assert l2[0].get_output_for.call_count == 0 # l1[0].input_var should not have been accessed assert p.call_count == 0 def test_get_output_input_is_a_mapping_for_input_layer(self, layers, get_output): l1, l2, l3 = layers p = PropertyMock() type(l1[0]).input_var = p input_expr, kwarg = theano.tensor.matrix(), object() inputs = {l1[0]: input_expr} output = get_output(l3, inputs, kwarg=kwarg) # expected: l3.get_output_for([l2[0].get_output_for(input_expr, # kwarg=kwarg), # l2[1].get_output_for(l1[1].input_var, # kwarg=kwarg)], # kwarg=kwarg) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with([ l2[0].get_output_for.return_value, l2[1].get_output_for.return_value, ], kwarg=kwarg) l2[0].get_output_for.assert_called_with( input_expr, kwarg=kwarg) l2[1].get_output_for.assert_called_with( l1[1].input_var, kwarg=kwarg) # l1[0].input_var should not have been accessed assert p.call_count == 0 @pytest.fixture def layer_from_shape(self): from lasagne.layers.input import InputLayer from lasagne.layers.base import MergeLayer return MergeLayer([ (None, 20), Mock(InputLayer((None,)), output_shape=(None,))]) def test_layer_from_shape_invalid_get_output(self, layer_from_shape, get_output): layer = layer_from_shape with pytest.raises(ValueError): get_output(layer) with pytest.raises(ValueError): get_output(layer, [1, 2]) with pytest.raises(ValueError): get_output(layer, {layer.input_layers[1]: [1, 2]}) def test_layer_from_shape_valid_get_output(self, layer_from_shape, get_output): layer = layer_from_shape inputs = {layer: theano.tensor.matrix()} assert get_output(layer, inputs) is inputs[layer] inputs = {None: theano.tensor.matrix()} layer.get_output_for = Mock() assert get_output(layer, inputs) is layer.get_output_for.return_value layer.get_output_for.assert_called_with( [inputs[None], layer.input_layers[1].input_var]) def test_invalid_input_key(self, layer_from_shape, get_output): layer = layer_from_shape with pytest.raises(TypeError): get_output(layer, {Mock(): [1, 2]}) class TestGetOutputShape_InputLayer: @pytest.fixture def get_output_shape(self): from lasagne.layers.helper import get_output_shape return get_output_shape @pytest.fixture def layer(self): from lasagne.layers.input import InputLayer return InputLayer((3, 2)) def test_get_output_shape_without_arguments(self, layer, get_output_shape): assert get_output_shape(layer) == (3, 2) def test_get_output_shape_input_is_tuple(self, layer, get_output_shape): shp = (4, 5, 6) assert get_output_shape(layer, shp) == shp def test_get_output_shape_input_is_a_mapping(self, layer, get_output_shape): input_shapes = {layer: (4, 5, 6)} assert get_output_shape(layer, input_shapes) == input_shapes[layer] class TestGetOutputShape_Layer: @pytest.fixture def get_output_shape(self): from lasagne.layers.helper import get_output_shape return get_output_shape @pytest.fixture def layers(self): from lasagne.layers.base import Layer from lasagne.layers.input import InputLayer # create a mock that has the same attributes as an InputLayer instance l1 = Mock(InputLayer((None,)), output_shape=(None,)) # create a mock that has the same attributes as a Layer instance l2 = Mock(Layer(l1), output_shape=(None,)) # link it to the InputLayer mock l2.input_layer = l1 # create another mock that has the same attributes as a Layer instance l3 = Mock(Layer(l2), output_shape=(None,)) # link it to the first mock, to get an "l1 --> l2 --> l3" chain l3.input_layer = l2 return l1, l2, l3 def test_get_output_shape_without_arguments(self, layers, get_output_shape): l1, l2, l3 = layers output_shape = get_output_shape(l3) # expected: l3.output_shape assert output_shape is l3.output_shape # l3.get_output_shape_for, l2.get_output_shape_for should not have been # called assert l3.get_output_shape_for.call_count == 0 assert l2.get_output_shape_for.call_count == 0 def test_get_output_shape_with_single_argument(self, layers, get_output_shape): l1, l2, l3 = layers shp = (3, 4, 5) output_shape = get_output_shape(l3, shp) # expected: l3.get_output_shape_for(l2.get_output_shape_for(shp)) assert output_shape is l3.get_output_shape_for.return_value l3.get_output_shape_for.assert_called_with( l2.get_output_shape_for.return_value) l2.get_output_shape_for.assert_called_with(shp) def test_get_output_shape_input_is_a_mapping(self, layers, get_output_shape): l1, l2, l3 = layers input_shapes = {l3: (4, 5, 6)} # expected: input_shapes[l3] assert get_output_shape(l3, input_shapes) is input_shapes[l3] # l3.get_output_shape_for, l2.get_output_shape_for should not have been # called assert l3.get_output_shape_for.call_count == 0 assert l2.get_output_shape_for.call_count == 0 def test_get_output_shape_input_is_a_mapping_no_key(self, layers, get_output_shape): l1, l2, l3 = layers output_shape = get_output_shape(l3, {}) # expected: l3.output_shape assert output_shape is l3.output_shape # l3.get_output_shape_for, l2.get_output_shape_for should not have been # called assert l3.get_output_shape_for.call_count == 0 assert l2.get_output_shape_for.call_count == 0 def test_get_output_shape_input_is_a_mapping_for_layer(self, layers, get_output_shape): l1, l2, l3 = layers shp = (4, 5, 6) input_shapes = {l2: shp} output_shape = get_output_shape(l3, input_shapes) # expected: l3.get_output_shape_for(shp) assert output_shape is l3.get_output_shape_for.return_value l3.get_output_shape_for.assert_called_with(shp) # l2.get_output_shape_for should not have been called assert l2.get_output_shape_for.call_count == 0 def test_get_output_shape_input_is_a_mapping_for_input_layer( self, layers, get_output_shape): l1, l2, l3 = layers shp = (4, 5, 6) input_shapes = {l1: shp} output_shape = get_output_shape(l3, input_shapes) # expected: l3.get_output_shape_for(l2.get_output_shape_for(shp)) assert output_shape is l3.get_output_shape_for.return_value l3.get_output_shape_for.assert_called_with( l2.get_output_shape_for.return_value) l2.get_output_shape_for.assert_called_with(shp) @pytest.fixture def layer_from_shape(self): from lasagne.layers.base import Layer return Layer((None, 20)) def test_layer_from_shape(self, layer_from_shape, get_output_shape): layer = layer_from_shape input_shapes = {layer: (4, 5, 6)} assert get_output_shape(layer, input_shapes) is input_shapes[layer] input_shapes = {None: (4, 5, 6)} layer.get_output_shape_for = Mock() assert (get_output_shape(layer, input_shapes) is layer.get_output_shape_for.return_value) layer.get_output_shape_for.assert_called_with(input_shapes[None]) class TestGetOutputShape_MergeLayer: @pytest.fixture def get_output_shape(self): from lasagne.layers.helper import get_output_shape return get_output_shape @pytest.fixture def layers(self): from lasagne.layers.base import Layer, MergeLayer from lasagne.layers.input import InputLayer # create two mocks of the same attributes as an InputLayer instance l1 = [Mock(InputLayer((None,)), output_shape=(None,)), Mock(InputLayer((None,)), output_shape=(None,))] # create two mocks of the same attributes as a Layer instance l2 = [Mock(Layer(l1[0]), output_shape=(None,)), Mock(Layer(l1[1]), output_shape=(None,))] # link them to the InputLayer mocks l2[0].input_layer = l1[0] l2[1].input_layer = l1[1] # create a mock that has the same attributes as a MergeLayer l3 = Mock(MergeLayer(l2)) # link it to the two layer mocks, to get the following network: # l1[0] --> l2[0] --> l3 # l1[1] --> l2[1] ----^ l3.input_layers = l2 return l1, l2, l3 def test_get_output_shape_without_arguments(self, layers, get_output_shape): l1, l2, l3 = layers output_shape = get_output_shape(l3) # expected: l3.output_shape assert output_shape is l3.output_shape # l3.get_output_shape_for, l2[*].get_output_shape_for should not have # been called assert l3.get_output_shape_for.call_count == 0 assert l2[0].get_output_shape_for.call_count == 0 assert l2[1].get_output_shape_for.call_count == 0 def test_get_output_shape_with_single_argument_fails(self, layers, get_output_shape): l1, l2, l3 = layers shp = (4, 5, 6) # expected to fail: only gave one shape tuple for two input layers with pytest.raises(ValueError): output_shape = get_output_shape(l3, shp) def test_get_output_shape_input_is_a_mapping(self, layers, get_output_shape): l1, l2, l3 = layers input_shapes = {l3: (4, 5, 6)} # expected: input_shapes[l3] assert get_output_shape(l3, input_shapes) is input_shapes[l3] # l3.get_output_shape_for, l2[*].get_output_shape_for should not have # been called assert l3.get_output_shape_for.call_count == 0 assert l2[0].get_output_shape_for.call_count == 0 assert l2[1].get_output_shape_for.call_count == 0 def test_get_output_shape_input_is_a_mapping_no_key(self, layers, get_output_shape): l1, l2, l3 = layers output_shape = get_output_shape(l3, {}) # expected: l3.output_shape assert output_shape is l3.output_shape # l3.get_output_shape_for, l2[*].get_output_shape_for should not have # been called assert l3.get_output_shape_for.call_count == 0 assert l2[0].get_output_shape_for.call_count == 0 assert l2[1].get_output_shape_for.call_count == 0 def test_get_output_shape_input_is_a_mapping_for_layer(self, layers, get_output_shape): l1, l2, l3 = layers shp = (4, 5, 6) input_shapes = {l2[0]: shp} output = get_output_shape(l3, input_shapes) # expected: l3.get_output_shape_for( # [shp, l2[1].get_output_shape_for(l1[1].shape)]) assert output is l3.get_output_shape_for.return_value l3.get_output_shape_for.assert_called_with([ shp, l2[1].get_output_shape_for.return_value]) l2[1].get_output_shape_for.assert_called_with(l1[1].shape) # l2[0].get_output_shape_for should not have been called assert l2[0].get_output_shape_for.call_count == 0 def test_get_output_shape_input_is_a_mapping_for_input_layer( self, layers, get_output_shape): l1, l2, l3 = layers shp = (4, 5, 6) input_shapes = {l1[0]: shp} output = get_output_shape(l3, input_shapes) # expected: l3.get_output_shape_for( # [l2[0].get_output_shape_for(shp), # l2[1].get_output_shape_for(l1[1].shape)]) assert output is l3.get_output_shape_for.return_value l3.get_output_shape_for.assert_called_with([ l2[0].get_output_shape_for.return_value, l2[1].get_output_shape_for.return_value, ]) l2[0].get_output_shape_for.assert_called_with(shp) l2[1].get_output_shape_for.assert_called_with(l1[1].shape) @pytest.fixture def layer_from_shape(self): from lasagne.layers.input import InputLayer from lasagne.layers.base import MergeLayer return MergeLayer([ (None, 20), Mock(InputLayer((None,)), output_shape=(None,))]) def test_layer_from_shape_valid_get_output_shape(self, layer_from_shape, get_output_shape): layer = layer_from_shape input_shapes = {layer: (4, 5, 6)} assert get_output_shape(layer, input_shapes) is input_shapes[layer] input_shapes = {None: (4, 5, 6)} layer.get_output_shape_for = Mock() assert (get_output_shape(layer, input_shapes) is layer.get_output_shape_for.return_value) layer.get_output_shape_for.assert_called_with( [input_shapes[None], layer.input_layers[1].shape]) class TestGetAllParams: def test_get_all_params(self): from lasagne.layers import (InputLayer, DenseLayer, get_all_params) l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 40) assert get_all_params(l3) == l2.get_params() + l3.get_params() assert (get_all_params(l3, regularizable=False) == (l2.get_params(regularizable=False) + l3.get_params(regularizable=False))) assert (get_all_params(l3, regularizable=True) == (l2.get_params(regularizable=True) + l3.get_params(regularizable=True))) def test_get_all_params_with_unwrap_shared(self): from lasagne.layers import (InputLayer, DenseLayer, get_all_params) import theano.tensor as T from lasagne.utils import floatX l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) W1 = theano.shared(floatX(numpy.zeros((30, 2)))) W2 = theano.shared(floatX(numpy.zeros((2, 40)))) W_expr = T.dot(W1, W2) l3 = DenseLayer(l2, 40, W=W_expr, b=None) l2_params = get_all_params(l2) assert get_all_params(l3) == l2_params + [W1, W2] assert get_all_params(l3, unwrap_shared=False) == l2_params + [W_expr] class TestCountParams: def test_get_all_params(self): from lasagne.layers import (InputLayer, DenseLayer, count_params) l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 40) num_weights = 20 * 30 + 30 * 40 num_biases = 30 + 40 assert count_params(l3, regularizable=True) == num_weights assert count_params(l3, regularizable=False) == num_biases assert count_params(l3) == num_weights + num_biases class TestGetAllParamValues: def test_get_all_param_values(self): from lasagne.layers import (InputLayer, DenseLayer, get_all_param_values) l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 40) pvs = get_all_param_values(l3) assert len(pvs) == 4 class TestSetAllParamValues: def test_set_all_param_values(self): from lasagne.layers import (InputLayer, DenseLayer, set_all_param_values) from lasagne.utils import floatX l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 40) a2 = floatX(numpy.random.normal(0, 1, (20, 30))) b2 = floatX(numpy.random.normal(0, 1, (30,))) a3 = floatX(numpy.random.normal(0, 1, (30, 40))) b3 = floatX(numpy.random.normal(0, 1, (40,))) set_all_param_values(l3, [a2, b2, a3, b3]) assert numpy.allclose(l3.W.get_value(), a3) assert numpy.allclose(l3.b.get_value(), b3) assert numpy.allclose(l2.W.get_value(), a2) assert numpy.allclose(l2.b.get_value(), b2) with pytest.raises(ValueError): set_all_param_values(l3, [a3, b3, a2]) with pytest.raises(ValueError): a3_bad = floatX(numpy.random.normal(0, 1, (25, 40))) set_all_param_values(l3, [a2, b2, a3_bad, b3]) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_dense.py0000644000175000017500000003154213307306052027646 0ustar sinclairssinclairsfrom mock import Mock import numpy as np import pytest import theano import lasagne class TestDenseLayer: @pytest.fixture def DenseLayer(self): from lasagne.layers.dense import DenseLayer return DenseLayer @pytest.fixture(params=(1, 2, -1)) def layer_vars(self, request, dummy_input_layer, DenseLayer): input_shape = dummy_input_layer.shape num_units = 5 num_leading_axes = request.param W_shape = (np.prod(input_shape[num_leading_axes:]), num_units) b_shape = (num_units,) W = Mock() b = Mock() nonlinearity = Mock() W.return_value = np.arange(np.prod(W_shape)).reshape(W_shape) b.return_value = np.arange(np.prod(b_shape)).reshape(b_shape) * 3 layer = DenseLayer( dummy_input_layer, num_units=num_units, num_leading_axes=num_leading_axes, W=W, b=b, nonlinearity=nonlinearity, ) return { 'input_shape': input_shape, 'num_units': num_units, 'num_leading_axes': num_leading_axes, 'W_shape': W_shape, 'b_shape': b_shape, 'W': W, 'b': b, 'nonlinearity': nonlinearity, 'layer': layer, } @pytest.fixture def layer(self, layer_vars): return layer_vars['layer'] def test_init(self, layer_vars): layer = layer_vars['layer'] assert (layer.W.get_value() == layer_vars['W'].return_value).all() assert (layer.b.get_value() == layer_vars['b'].return_value).all() layer_vars['W'].assert_called_with(layer_vars['W_shape']) layer_vars['b'].assert_called_with(layer_vars['b_shape']) def test_init_none_nonlinearity_bias(self, DenseLayer, dummy_input_layer): layer = DenseLayer( dummy_input_layer, num_units=3, nonlinearity=None, b=None, ) assert layer.nonlinearity == lasagne.nonlinearities.identity assert layer.b is None def test_wrong_num_leading_axes(self, DenseLayer, dummy_input_layer): with pytest.raises(ValueError) as exc: DenseLayer(dummy_input_layer, 5, num_leading_axes=3) assert "leaving no trailing axes" in exc.value.args[0] with pytest.raises(ValueError) as exc: DenseLayer(dummy_input_layer, 5, num_leading_axes=-4) assert "requesting more trailing axes" in exc.value.args[0] def test_variable_shape(self, DenseLayer): # should work: assert DenseLayer((None, 10), 20).output_shape == (None, 20) assert DenseLayer((10, None, 10), 20, num_leading_axes=2).output_shape == (10, None, 20) # should fail: for shape, num_leading_axes in ((10, None), 1), ((10, None, 10), 1): with pytest.raises(ValueError) as exc: DenseLayer(shape, 20, num_leading_axes=num_leading_axes) assert "requires a fixed input shape" in exc.value.args[0] def test_get_params(self, layer): assert layer.get_params() == [layer.W, layer.b] assert layer.get_params(regularizable=False) == [layer.b] assert layer.get_params(regularizable=True) == [layer.W] assert layer.get_params(trainable=True) == [layer.W, layer.b] assert layer.get_params(trainable=False) == [] assert layer.get_params(_nonexistent_tag=True) == [] assert layer.get_params(_nonexistent_tag=False) == [layer.W, layer.b] def test_get_output_shape_for(self, layer_vars): layer = layer_vars['layer'] num_units = layer_vars['num_units'] num_leading_axes = layer_vars['num_leading_axes'] for input_shape in ((5, 6, 7), (None, 2, 3), (None, None, None)): output_shape = input_shape[:num_leading_axes] + (num_units,) assert layer.get_output_shape_for(input_shape) == output_shape def test_get_output_for(self, layer_vars): layer = layer_vars['layer'] nonlinearity = layer_vars['nonlinearity'] num_leading_axes = layer_vars['num_leading_axes'] W = layer_vars['W']() b = layer_vars['b']() input = theano.shared(np.ones(layer_vars['input_shape'])) result = layer.get_output_for(input) assert result is nonlinearity.return_value # Check that the input to the nonlinearity was what we expect # from dense layer, i.e. the dot product plus bias nonlinearity_arg = nonlinearity.call_args[0][0] expected = input.get_value() expected = expected.reshape(expected.shape[:num_leading_axes] + (-1,)) expected = np.dot(expected, W) + b assert np.allclose(nonlinearity_arg.eval(), expected) def test_param_names(self, layer): assert layer.W.name == "W" assert layer.b.name == "b" def test_named_layer_param_names(self, DenseLayer, dummy_input_layer): layer = DenseLayer( dummy_input_layer, num_units=3, name="foo" ) assert layer.W.name == "foo.W" assert layer.b.name == "foo.b" class TestNINLayer: @pytest.fixture def dummy_input_layer(self): from lasagne.layers.input import InputLayer input_layer = InputLayer((2, 3, 4, 5)) mock = Mock(input_layer) mock.shape = input_layer.shape mock.input_var = input_layer.input_var mock.output_shape = input_layer.output_shape return mock @pytest.fixture def NINLayer(self): from lasagne.layers.dense import NINLayer return NINLayer @pytest.fixture def layer_vars(self, NINLayer, dummy_input_layer): W = Mock() b = Mock() nonlinearity = Mock() W.return_value = np.ones((3, 5)) b.return_value = np.ones((5,)) layer = NINLayer( dummy_input_layer, num_units=5, W=W, b=b, nonlinearity=nonlinearity, ) return { 'W': W, 'b': b, 'nonlinearity': nonlinearity, 'layer': layer, } @pytest.fixture def layer(self, layer_vars): return layer_vars['layer'] def test_init(self, layer_vars): layer = layer_vars['layer'] assert (layer.W.get_value() == layer_vars['W'].return_value).all() assert (layer.b.get_value() == layer_vars['b'].return_value).all() layer_vars['W'].assert_called_with((3, 5)) layer_vars['b'].assert_called_with((5,)) def test_init_none_nonlinearity_bias(self, NINLayer, dummy_input_layer): layer = NINLayer( dummy_input_layer, num_units=3, nonlinearity=None, b=None, ) assert layer.nonlinearity == lasagne.nonlinearities.identity assert layer.b is None def test_init_untie_biases(self, NINLayer, dummy_input_layer): layer = NINLayer( dummy_input_layer, num_units=5, untie_biases=True, ) assert (layer.b.shape.eval() == (5, 4, 5)).all() def test_get_params(self, layer): assert layer.get_params() == [layer.W, layer.b] assert layer.get_params(regularizable=False) == [layer.b] assert layer.get_params(regularizable=True) == [layer.W] assert layer.get_params(trainable=True) == [layer.W, layer.b] assert layer.get_params(trainable=False) == [] assert layer.get_params(_nonexistent_tag=True) == [] assert layer.get_params(_nonexistent_tag=False) == [layer.W, layer.b] def test_get_output_shape_for(self, layer): assert layer.get_output_shape_for((5, 6, 7, 8)) == (5, 5, 7, 8) @pytest.mark.parametrize("extra_kwargs", [ {}, {'untie_biases': True}, {'b': None}, ]) def test_get_output_for(self, dummy_input_layer, extra_kwargs): from lasagne.layers.dense import NINLayer nonlinearity = Mock() layer = NINLayer( dummy_input_layer, num_units=6, nonlinearity=nonlinearity, **extra_kwargs ) input = theano.shared(np.random.uniform(-1, 1, (2, 3, 4, 5))) result = layer.get_output_for(input) assert result is nonlinearity.return_value nonlinearity_arg = nonlinearity.call_args[0][0] X = input.get_value() X = np.rollaxis(X, 1).T X = np.dot(X, layer.W.get_value()) if layer.b is not None: if layer.untie_biases: X += layer.b.get_value()[:, np.newaxis].T else: X += layer.b.get_value() X = np.rollaxis(X.T, 0, 2) assert np.allclose(nonlinearity_arg.eval(), X) def test_param_names(self, layer): assert layer.W.name == "W" assert layer.b.name == "b" def test_named_layer_param_names(self, NINLayer, dummy_input_layer): layer = NINLayer( dummy_input_layer, num_units=3, name="foo" ) assert layer.W.name == "foo.W" assert layer.b.name == "foo.b" class TestNINLayer_c01b: @pytest.fixture def dummy_input_layer(self): from lasagne.layers.input import InputLayer input_layer = InputLayer((3, 4, 5, 2)) mock = Mock(input_layer) mock.shape = input_layer.shape mock.input_var = input_layer.input_var mock.output_shape = input_layer.output_shape return mock @pytest.fixture def NINLayer_c01b(self): try: from lasagne.layers.cuda_convnet import NINLayer_c01b except ImportError: pytest.skip("cuda_convnet not available") return NINLayer_c01b @pytest.fixture def layer_vars(self, NINLayer_c01b, dummy_input_layer): W = Mock() b = Mock() nonlinearity = Mock() W.return_value = np.ones((5, 3)) b.return_value = np.ones((5,)) layer = NINLayer_c01b( dummy_input_layer, num_units=5, W=W, b=b, nonlinearity=nonlinearity, ) return { 'W': W, 'b': b, 'nonlinearity': nonlinearity, 'layer': layer, } @pytest.fixture def layer(self, layer_vars): return layer_vars['layer'] def test_init(self, layer_vars): layer = layer_vars['layer'] assert (layer.W.get_value() == layer_vars['W'].return_value).all() assert (layer.b.get_value() == layer_vars['b'].return_value).all() layer_vars['W'].assert_called_with((5, 3)) layer_vars['b'].assert_called_with((5,)) def test_init_none_nonlinearity_bias(self, NINLayer_c01b, dummy_input_layer): layer = NINLayer_c01b( dummy_input_layer, num_units=3, nonlinearity=None, b=None, ) assert layer.nonlinearity == lasagne.nonlinearities.identity assert layer.b is None def test_init_untie_biases(self, NINLayer_c01b, dummy_input_layer): layer = NINLayer_c01b( dummy_input_layer, num_units=5, untie_biases=True, ) assert (layer.b.shape.eval() == (5, 4, 5)).all() def test_get_params(self, layer): assert layer.get_params() == [layer.W, layer.b] assert layer.get_params(regularizable=False) == [layer.b] assert layer.get_params(regularizable=True) == [layer.W] assert layer.get_params(trainable=True) == [layer.W, layer.b] assert layer.get_params(trainable=False) == [] assert layer.get_params(_nonexistent_tag=True) == [] assert layer.get_params(_nonexistent_tag=False) == [layer.W, layer.b] def test_get_output_shape_for(self, layer): assert layer.get_output_shape_for((6, 7, 8, 5)) == (5, 7, 8, 5) @pytest.mark.parametrize("extra_kwargs", [ {}, {'untie_biases': True}, {'b': None}, ]) def test_get_output_for(self, dummy_input_layer, NINLayer_c01b, extra_kwargs): nonlinearity = Mock() layer = NINLayer_c01b( dummy_input_layer, num_units=6, nonlinearity=nonlinearity, **extra_kwargs ) input = theano.shared(np.random.uniform(-1, 1, (3, 4, 5, 2))) result = layer.get_output_for(input) assert result is nonlinearity.return_value nonlinearity_arg = nonlinearity.call_args[0][0] X = input.get_value() W = layer.W.get_value() out = np.dot(W, X.reshape(X.shape[0], -1)) out = out.reshape(W.shape[0], X.shape[1], X.shape[2], X.shape[3]) if layer.b is not None: if layer.untie_biases: out += layer.b.get_value()[..., None] else: out += layer.b.get_value()[:, None, None, None] assert np.allclose(nonlinearity_arg.eval(), out) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_shape.py0000644000175000017500000002656713307306052027663 0ustar sinclairssinclairsimport numpy as np import pytest import theano from mock import Mock class TestFlattenLayer: @pytest.fixture def layer(self): from lasagne.layers.shape import FlattenLayer return FlattenLayer(Mock(output_shape=(None,))) @pytest.fixture def layer_outdim3(self): from lasagne.layers.shape import FlattenLayer return FlattenLayer(Mock(output_shape=(None,)), outdim=3) @pytest.fixture def layer_outdim1(self): from lasagne.layers.shape import FlattenLayer return FlattenLayer(Mock(output_shape=(None,)), outdim=1) def test_get_output_shape_for(self, layer): input_shape = (2, 3, 4, 5) assert layer.get_output_shape_for(input_shape) == (2, 3 * 4 * 5) def test_get_output_shape_for_contain_none(self, layer): input_shape = (2, 3, None, 5) assert layer.get_output_shape_for(input_shape) == (2, None) def test_get_output_for(self, layer): input = np.random.random((2, 3, 4, 5)) result = layer.get_output_for(theano.shared(input)).eval() assert (result == input.reshape((input.shape[0], -1))).all() def test_get_output_shape_for_outdim3(self, layer_outdim3): input_shape = (2, 3, 4, 5) assert layer_outdim3.get_output_shape_for(input_shape) == (2, 3, 4 * 5) def test_get_output_for_outdim3(self, layer_outdim3): input = np.random.random((2, 3, 4, 5)) result = layer_outdim3.get_output_for(theano.shared(input)).eval() assert (result == input.reshape( (input.shape[0], input.shape[1], -1))).all() def test_get_output_shape_for_outdim1(self, layer_outdim1): input_shape = (2, 3, 4, 5) assert layer_outdim1.get_output_shape_for(input_shape) == ( 2 * 3 * 4 * 5, ) def test_get_output_for_outdim1(self, layer_outdim1): input = np.random.random((2, 3, 4, 5)) result = layer_outdim1.get_output_for(theano.shared(input)).eval() assert (result == input.reshape(-1)).all() def test_dim0_raises(self): from lasagne.layers.shape import FlattenLayer with pytest.raises(ValueError): FlattenLayer((2, 3, 4), outdim=0) class TestPadLayer: @pytest.fixture def layerclass(self): from lasagne.layers.shape import PadLayer return PadLayer @pytest.mark.parametrize( "width, input_shape, output_shape", [(3, (2, 3, 4, 5), (2, 3, 10, 11)), ((2, 3), (2, 3, 4, 5), (2, 3, 8, 11)), (((1, 2), (3, 4)), (2, 3, 4, 5), (2, 3, 7, 12)), (3, (2, 3, None, 5), (2, 3, None, 11)), ((2, 3), (2, 3, 4, None), (2, 3, 8, None)), (((1, 2), (3, 4)), (None, 3, None, None), (None, 3, None, None)), ]) def test_get_output_shape_for(self, layerclass, width, input_shape, output_shape): layer = layerclass(Mock(output_shape=(None,)), width=width) assert layer.get_output_shape_for(input_shape) == output_shape def test_get_output_for(self, layerclass): layer = layerclass(Mock(output_shape=(None,)), width=2) input = np.zeros((1, 2, 10)) trimmed = theano.shared(input[:, :, 2:-2]) result = layer.get_output_for(trimmed).eval() assert (result == input).all() class TestReshapeLayer: @pytest.fixture def layerclass(self): from lasagne.layers.shape import ReshapeLayer return ReshapeLayer @pytest.fixture def two_unknown(self): from lasagne.layers.input import InputLayer shape = (16, 3, None, None, 10) return (InputLayer(shape), theano.shared(np.ones((16, 3, 5, 7, 10)))) def test_no_reference(self, layerclass, two_unknown): inputlayer, inputdata = two_unknown layer = layerclass(inputlayer, (16, 3, 5, 7, 2, 5)) assert layer.output_shape == (16, 3, 5, 7, 2, 5) result = layer.get_output_for(inputdata).eval() assert result.shape == (16, 3, 5, 7, 2, 5) def test_reference_both(self, layerclass, two_unknown): inputlayer, inputdata = two_unknown layer = layerclass(inputlayer, (-1, [1], [2], [3], 2, 5)) assert layer.output_shape == (16, 3, None, None, 2, 5) result = layer.get_output_for(inputdata).eval() assert result.shape == (16, 3, 5, 7, 2, 5) def test_reference_one(self, layerclass, two_unknown): inputlayer, inputdata = two_unknown layer = layerclass(inputlayer, (-1, [1], [2], 7, 2, 5)) assert layer.output_shape == (None, 3, None, 7, 2, 5) result = layer.get_output_for(inputdata).eval() assert result.shape == (16, 3, 5, 7, 2, 5) def test_reference_twice(self, layerclass, two_unknown): inputlayer, inputdata = two_unknown layer = layerclass(inputlayer, (-1, [1], [2], [3], 2, [2])) assert layer.output_shape == (None, 3, None, None, 2, None) result = layer.get_output_for(inputdata).eval() assert result.shape == (16, 3, 5, 7, 2, 5) def test_merge_with_unknown(self, layerclass, two_unknown): inputlayer, inputdata = two_unknown layer = layerclass(inputlayer, ([0], [1], [2], -1)) assert layer.output_shape == (16, 3, None, None) result = layer.get_output_for(inputdata).eval() assert result.shape == (16, 3, 5, 70) def test_merge_two_unknowns(self, layerclass, two_unknown): inputlayer, inputdata = two_unknown layer = layerclass(inputlayer, ([0], [1], -1, [4])) assert layer.output_shape == (16, 3, None, 10) result = layer.get_output_for(inputdata).eval() assert result.shape == (16, 3, 35, 10) def test_size_mismatch(self, layerclass, two_unknown): inputlayer, inputdata = two_unknown with pytest.raises(ValueError) as excinfo: layerclass(inputlayer, (17, 3, [2], [3], -1)) assert 'match' in str(excinfo.value) def test_invalid_spec(self, layerclass, two_unknown): inputlayer, inputdata = two_unknown with pytest.raises(ValueError): layerclass(inputlayer, (-16, 3, 5, 7, 10)) with pytest.raises(ValueError): layerclass(inputlayer, (-1, 3, 5, 7, -1)) with pytest.raises(ValueError): layerclass(inputlayer, ([-1], 3, 5, 7, 10)) with pytest.raises(ValueError): layerclass(inputlayer, ([0, 1], 3, 5, 7, 10)) with pytest.raises(ValueError): layerclass(inputlayer, (None, 3, 5, 7, 10)) with pytest.raises(ValueError): layerclass(inputlayer, (16, 3, 5, 7, [5])) with pytest.raises(ValueError): layerclass(inputlayer, (16, 3, theano.tensor.vector(), 7, 10)) def test_symbolic_shape(self): from lasagne.layers import InputLayer, ReshapeLayer, get_output x = theano.tensor.tensor3() batch_size, seq_len, num_features = x.shape l_inp = InputLayer((None, None, None)) l_rshp2 = ReshapeLayer(l_inp, (batch_size*seq_len, [2])) # we cannot infer any of the output shapes because they are symbolic. output_shape = l_rshp2.get_output_shape_for( (batch_size, seq_len, num_features)) assert output_shape == (None, None) output = get_output(l_rshp2, x) out1 = output.eval({x: np.ones((3, 5, 6), dtype='float32')}) out2 = output.eval({x: np.ones((4, 5, 7), dtype='float32')}) assert out1.shape == (3*5, 6) assert out2.shape == (4*5, 7) class TestDimshuffleLayer: @pytest.fixture def input_shape(self): return (2, 3, 1, 5, 7) @pytest.fixture def input_var(self): InputTensorType = theano.tensor.TensorType( 'float64', broadcastable=(False, False, True, False, False), name='DimShuffleTestTensor') return InputTensorType(name='x') @pytest.fixture def input_layer(self, input_shape, input_var): from lasagne.layers.input import InputLayer return InputLayer(input_shape, input_var) @pytest.fixture def input_shape_with_None(self): return (2, 3, None, 5, 7) @pytest.fixture def input_layer_with_None(self, input_shape_with_None, input_var): from lasagne.layers.input import InputLayer return InputLayer(input_shape_with_None, input_var) @pytest.fixture def input_data(self, input_shape): return np.ones(input_shape) def test_rearrange(self, input_data, input_var, input_layer): from lasagne.layers.shape import DimshuffleLayer ds = DimshuffleLayer(input_layer, [4, 3, 2, 1, 0]) assert ds.output_shape == (7, 5, 1, 3, 2) assert ds.get_output_for(input_var).eval( {input_var: input_data}).shape == (7, 5, 1, 3, 2) def test_broadcast(self, input_data, input_var, input_layer): from lasagne.layers.shape import DimshuffleLayer ds = DimshuffleLayer(input_layer, [0, 1, 2, 3, 4, 'x']) assert ds.output_shape == (2, 3, 1, 5, 7, 1) assert ds.get_output_for(input_var).eval( {input_var: input_data}).shape == (2, 3, 1, 5, 7, 1) def test_collapse(self, input_data, input_var, input_layer): from lasagne.layers.shape import DimshuffleLayer ds_ok = DimshuffleLayer(input_layer, [0, 1, 3, 4]) assert ds_ok.output_shape == (2, 3, 5, 7) assert ds_ok.get_output_for(input_var).eval( {input_var: input_data}).shape == (2, 3, 5, 7) with pytest.raises(ValueError): DimshuffleLayer(input_layer, [0, 1, 2, 4]) def test_collapse_None(self, input_data, input_var, input_layer_with_None): from lasagne.layers.shape import DimshuffleLayer ds_ok = DimshuffleLayer(input_layer_with_None, [0, 1, 3, 4]) assert ds_ok.output_shape == (2, 3, 5, 7) assert ds_ok.get_output_for(input_var).eval( {input_var: input_data}).shape == (2, 3, 5, 7) with pytest.raises(ValueError): DimshuffleLayer(input_layer_with_None, [0, 1, 2, 4]) def test_invalid_pattern(self, input_data, input_var, input_layer): from lasagne.layers.shape import DimshuffleLayer with pytest.raises(ValueError): DimshuffleLayer(input_layer, ['q']) with pytest.raises(ValueError): DimshuffleLayer(input_layer, [0, 0, 1, 3, 4]) with pytest.raises(ValueError): # There is no dimension 42 DimshuffleLayer(input_layer, [0, 1, 2, 4, 42]) def test_slice_layer(): from lasagne.layers import SliceLayer, InputLayer, get_output_shape,\ get_output from numpy.testing import assert_array_almost_equal as aeq in_shp = (3, 5, 2) l_inp = InputLayer(in_shp) l_slice_ax0 = SliceLayer(l_inp, axis=0, indices=0) l_slice_ax1 = SliceLayer(l_inp, axis=1, indices=slice(3, 5)) l_slice_ax2 = SliceLayer(l_inp, axis=-1, indices=-1) x = np.arange(np.prod(in_shp)).reshape(in_shp).astype('float32') x1 = x[0] x2 = x[:, 3:5] x3 = x[:, :, -1] assert get_output_shape(l_slice_ax0) == x1.shape assert get_output_shape(l_slice_ax1) == x2.shape assert get_output_shape(l_slice_ax2) == x3.shape aeq(get_output(l_slice_ax0, x).eval(), x1) aeq(get_output(l_slice_ax1, x).eval(), x2) aeq(get_output(l_slice_ax2, x).eval(), x3) # test slicing None dimension in_shp = (2, None, 2) l_inp = InputLayer(in_shp) l_slice_ax1 = SliceLayer(l_inp, axis=1, indices=slice(3, 5)) assert get_output_shape(l_slice_ax1) == (2, None, 2) aeq(get_output(l_slice_ax1, x).eval(), x2) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_noise.py0000644000175000017500000001525613307306052027671 0ustar sinclairssinclairsfrom mock import Mock import numpy import numpy as np from numpy.random import RandomState import theano import pytest from lasagne.random import get_rng, set_rng class TestDropoutLayer: @pytest.fixture(params=[(100, 100), (None, 100)]) def input_layer(self, request): from lasagne.layers.input import InputLayer return InputLayer(request.param) @pytest.fixture def layer(self, input_layer): from lasagne.layers.noise import DropoutLayer return DropoutLayer(input_layer) @pytest.fixture def layer_no_rescale(self, input_layer): from lasagne.layers.noise import DropoutLayer return DropoutLayer(input_layer, rescale=False) @pytest.fixture def layer_p_02(self, input_layer): from lasagne.layers.noise import DropoutLayer return DropoutLayer(input_layer, p=0.2) def test_get_output_for_non_deterministic(self, layer): input = theano.shared(numpy.ones((100, 100))) result = layer.get_output_for(input) result_eval = result.eval() assert 0.9 < result_eval.mean() < 1.1 assert (numpy.unique(result_eval) == [0., 2.]).all() def test_get_output_for_deterministic(self, layer): input = theano.shared(numpy.ones((100, 100))) result = layer.get_output_for(input, deterministic=True) result_eval = result.eval() assert (result_eval == input.get_value()).all() def test_get_output_for_no_rescale(self, layer_no_rescale): input = theano.shared(numpy.ones((100, 100))) result = layer_no_rescale.get_output_for(input) result_eval = result.eval() assert 0.4 < result_eval.mean() < 0.6 assert (numpy.unique(result_eval) == [0., 1.]).all() def test_get_output_for_no_rescale_dtype(self, layer_no_rescale): input = theano.shared(numpy.ones((100, 100), dtype=numpy.int32)) result = layer_no_rescale.get_output_for(input) assert result.dtype == input.dtype def test_get_output_for_p_02(self, layer_p_02): input = theano.shared(numpy.ones((100, 100))) result = layer_p_02.get_output_for(input) result_eval = result.eval() assert 0.9 < result_eval.mean() < 1.1 assert (numpy.round(numpy.unique(result_eval), 2) == [0., 1.25]).all() def test_get_output_for_p_float32(self, input_layer): from lasagne.layers.noise import DropoutLayer layer = DropoutLayer(input_layer, p=numpy.float32(0.5)) input = theano.shared(numpy.ones((100, 100), dtype=numpy.float32)) assert layer.get_output_for(input).dtype == input.dtype def test_get_output_for_p_float16(self, input_layer): from lasagne.layers.noise import DropoutLayer layer = DropoutLayer(input_layer, p=numpy.float16(0.5)) input = theano.shared(numpy.ones((100, 100), dtype=numpy.float16)) assert layer.get_output_for(input).dtype == input.dtype @pytest.mark.parametrize("shared_axes", [(), (0,), (2, 3), (-1, -2)]) def test_get_output_for_shared_axes(self, shared_axes): from lasagne.layers.noise import DropoutLayer layer = DropoutLayer((2, 4, 7, 9), shared_axes=shared_axes) input = theano.shared(numpy.ones((2, 4, 7, 9))) result = layer.get_output_for(input) result_eval = result.eval() # check if the dropout mask is the same across the specified axes: # compute the mean across these axes and compare against the full # output, broadcasting across the shared axes, to see if it matches assert np.allclose(result_eval.mean(axis=shared_axes, keepdims=True), result_eval) def test_specified_rng(self, input_layer): from lasagne.layers.noise import DropoutLayer input = theano.shared(numpy.ones((100, 100))) seed = 123456789 rng = get_rng() set_rng(RandomState(seed)) result = DropoutLayer(input_layer).get_output_for(input) result_eval1 = result.eval() set_rng(RandomState(seed)) result = DropoutLayer(input_layer).get_output_for(input) result_eval2 = result.eval() set_rng(rng) # reset to original RNG for other tests assert numpy.allclose(result_eval1, result_eval2) def test_dropout_convenience_functions(): from lasagne.layers.noise import (dropout_channels, spatial_dropout, dropout_locations) assert dropout_channels((10, 20)).shared_axes == () assert dropout_channels((None, None, None)).shared_axes == (2,) assert dropout_channels((1, 2, 3, 4)).shared_axes == (2, 3) assert dropout_channels((1, 2, 3, 4, 5, 6)).shared_axes == (2, 3, 4, 5) assert spatial_dropout((10, 20)).shared_axes == () assert spatial_dropout((None, None, None)).shared_axes == (2,) assert spatial_dropout((1, 2, 3, 4)).shared_axes == (2, 3) assert spatial_dropout((1, 2, 3, 4, 5, 6)).shared_axes == (2, 3, 4, 5) assert dropout_locations((10, 20)).shared_axes == (1,) assert dropout_locations((None, None, None)).shared_axes == (1,) assert dropout_locations((1, 2, 3, 4)).shared_axes == (1,) assert dropout_locations((1, 2, 3, 4, 5, 6)).shared_axes == (1,) class TestGaussianNoiseLayer: @pytest.fixture def layer(self): from lasagne.layers.noise import GaussianNoiseLayer return GaussianNoiseLayer(Mock(output_shape=(None,))) @pytest.fixture(params=[(100, 100), (None, 100)]) def input_layer(self, request): from lasagne.layers.input import InputLayer return InputLayer(request.param) def test_get_output_for_non_deterministic(self, layer): input = theano.shared(numpy.ones((100, 100))) result = layer.get_output_for(input, deterministic=False) result_eval = result.eval() assert (result_eval != input.eval()).all() assert result_eval.mean() != 1.0 assert numpy.round(result_eval.mean()) == 1.0 def test_get_output_for_deterministic(self, layer): input = theano.shared(numpy.ones((3, 3))) result = layer.get_output_for(input, deterministic=True) assert np.allclose(result.eval(), input.eval()) def test_specified_rng(self, input_layer): from lasagne.layers.noise import GaussianNoiseLayer input = theano.shared(numpy.ones((100, 100))) seed = 123456789 rng = get_rng() set_rng(RandomState(seed)) result = GaussianNoiseLayer(input_layer).get_output_for(input) result_eval1 = result.eval() set_rng(RandomState(seed)) result = GaussianNoiseLayer(input_layer).get_output_for(input) result_eval2 = result.eval() set_rng(rng) # reset to original RNG for other tests assert numpy.allclose(result_eval1, result_eval2) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_base.py0000644000175000017500000001464313307306052027465 0ustar sinclairssinclairsfrom mock import Mock import numpy import pytest import theano class TestLayer: @pytest.fixture def layer(self): from lasagne.layers.base import Layer return Layer(Mock(output_shape=(None,))) @pytest.fixture def named_layer(self): from lasagne.layers.base import Layer return Layer(Mock(output_shape=(None,)), name='layer_name') def test_input_shape(self, layer): assert layer.input_shape == layer.input_layer.output_shape def test_get_output_shape_for(self, layer): shape = Mock() assert layer.get_output_shape_for(shape) == shape @pytest.fixture def layer_from_shape(self): from lasagne.layers.base import Layer return Layer((None, 20)) def test_layer_from_shape(self, layer_from_shape): layer = layer_from_shape assert layer.input_layer is None assert layer.input_shape == (None, 20) def test_named_layer(self, named_layer): assert named_layer.name == 'layer_name' def test_get_params(self, layer): assert layer.get_params() == [] def test_get_params_tags(self, layer): a_shape = (20, 50) a = numpy.random.normal(0, 1, a_shape) A = layer.add_param(a, a_shape, name='A', tag1=True, tag2=False) b_shape = (30, 20) b = numpy.random.normal(0, 1, b_shape) B = layer.add_param(b, b_shape, name='B', tag1=True, tag2=True) c_shape = (40, 10) c = numpy.random.normal(0, 1, c_shape) C = layer.add_param(c, c_shape, name='C', tag2=True) assert layer.get_params() == [A, B, C] assert layer.get_params(tag1=True) == [A, B] assert layer.get_params(tag1=False) == [C] assert layer.get_params(tag2=True) == [B, C] assert layer.get_params(tag2=False) == [A] assert layer.get_params(tag1=True, tag2=True) == [B] def test_get_params_expressions(self, layer): x, y, z = (theano.shared(0, name=n) for n in 'xyz') W1 = layer.add_param(x**2 + theano.tensor.log(y), (), tag1=True) W2 = layer.add_param(theano.tensor.matrix(), (10, 10), tag1=True) W3 = layer.add_param(z.T, (), tag2=True) # layer.params stores the parameter expressions: assert list(layer.params.keys()) == [W1, W2, W3] # layer.get_params() returns the underlying shared variables: assert layer.get_params() == [x, y, z] # filtering acts on the parameter expressions: assert layer.get_params(tag1=True) == [x, y] assert layer.get_params(tag2=True) == [z] def test_add_param_tags(self, layer): a_shape = (20, 50) a = numpy.random.normal(0, 1, a_shape) A = layer.add_param(a, a_shape) assert A in layer.params assert 'trainable' in layer.params[A] assert 'regularizable' in layer.params[A] b_shape = (30, 20) b = numpy.random.normal(0, 1, b_shape) B = layer.add_param(b, b_shape, trainable=False) assert B in layer.params assert 'trainable' not in layer.params[B] assert 'regularizable' in layer.params[B] c_shape = (40, 10) c = numpy.random.normal(0, 1, c_shape) C = layer.add_param(c, c_shape, tag1=True) assert C in layer.params assert 'trainable' in layer.params[C] assert 'regularizable' in layer.params[C] assert 'tag1' in layer.params[C] def test_add_param_name(self, layer): a_shape = (20, 50) a = numpy.random.normal(0, 1, a_shape) A = layer.add_param(a, a_shape, name='A') assert A.name == 'A' def test_add_param_named_layer_name(self, named_layer): a_shape = (20, 50) a = numpy.random.normal(0, 1, a_shape) A = named_layer.add_param(a, a_shape, name='A') assert A.name == 'layer_name.A' def test_get_output_for_notimplemented(self, layer): with pytest.raises(NotImplementedError): layer.get_output_for(Mock()) def test_nonpositive_input_dims_raises_value_error(self, layer): from lasagne.layers.base import Layer neg_input_layer = Mock(output_shape=(None, -1, -1)) zero_input_layer = Mock(output_shape=(None, 0, 0)) pos_input_layer = Mock(output_shape=(None, 1, 1)) with pytest.raises(ValueError): Layer(neg_input_layer) with pytest.raises(ValueError): Layer(zero_input_layer) Layer(pos_input_layer) def test_symbolic_output_shape(self): from lasagne.layers.base import Layer class WrongLayer(Layer): def get_output_shape_for(self, input_shape): return theano.tensor.vector().shape with pytest.raises(ValueError) as exc: WrongLayer((None,)).output_shape assert "symbolic output shape" in exc.value.args[0] class TestMergeLayer: @pytest.fixture def layer(self): from lasagne.layers.base import MergeLayer return MergeLayer([Mock(), Mock()]) def test_input_shapes(self, layer): assert layer.input_shapes == [l.output_shape for l in layer.input_layers] @pytest.fixture def layer_from_shape(self): from lasagne.layers.input import InputLayer from lasagne.layers.base import MergeLayer return MergeLayer( [(None, 20), Mock(InputLayer((None,)), output_shape=(None,))] ) def test_layer_from_shape(self, layer_from_shape): layer = layer_from_shape assert layer.input_layers[0] is None assert layer.input_shapes[0] == (None, 20) assert layer.input_layers[1] is not None assert (layer.input_shapes[1] == layer.input_layers[1].output_shape) def test_get_params(self, layer): assert layer.get_params() == [] def test_get_output_shape_for_notimplemented(self, layer): with pytest.raises(NotImplementedError): layer.get_output_shape_for(Mock()) def test_get_output_for_notimplemented(self, layer): with pytest.raises(NotImplementedError): layer.get_output_for(Mock()) def test_symbolic_output_shape(self): from lasagne.layers.base import MergeLayer class WrongLayer(MergeLayer): def get_output_shape_for(self, input_shapes): return theano.tensor.vector().shape with pytest.raises(ValueError) as exc: WrongLayer([(None,)]).output_shape assert "symbolic output shape" in exc.value.args[0] Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_normalization.py0000644000175000017500000006114213307306052031435 0ustar sinclairssinclairs# -*- coding: utf-8 -*- """ The :func:`ground_truth_normalizer()`, :func:`ground_truth_normalize_row` and :class:`TestLocalResponseNormalization2DLayer` implementations contain code from `pylearn2 `_, which is covered by the following license: Copyright (c) 2011--2014, Université de Montréal All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ from mock import Mock import numpy as np import pytest import theano def ground_truth_normalizer(c01b, k, n, alpha, beta): out = np.zeros(c01b.shape) for r in range(out.shape[1]): for c in range(out.shape[2]): for x in range(out.shape[3]): out[:, r, c, x] = ground_truth_normalize_row( row=c01b[:, r, c, x], k=k, n=n, alpha=alpha, beta=beta) return out def ground_truth_normalize_row(row, k, n, alpha, beta): assert row.ndim == 1 out = np.zeros(row.shape) for i in range(row.shape[0]): s = k tot = 0 for j in range(max(0, i-n//2), min(row.shape[0], i+n//2+1)): tot += 1 sq = row[j] ** 2. assert sq > 0. assert s >= k assert alpha > 0. s += alpha * sq assert s >= k assert tot <= n assert s >= k s = s ** beta out[i] = row[i] / s return out class TestLocalResponseNormalization2DLayer: @pytest.fixture def rng(self): return np.random.RandomState([2013, 2]) @pytest.fixture def input_data(self, rng): channels = 15 rows = 3 cols = 4 batch_size = 2 shape = (batch_size, channels, rows, cols) return rng.randn(*shape).astype(theano.config.floatX) @pytest.fixture def input_layer(self, input_data): from lasagne.layers.input import InputLayer shape = list(input_data.shape) shape[0] = None return InputLayer(shape) @pytest.fixture def layer(self, input_layer): from lasagne.layers.normalization import\ LocalResponseNormalization2DLayer layer = LocalResponseNormalization2DLayer(input_layer, alpha=1.5, k=2, beta=0.75, n=5) return layer def test_get_params(self, layer): assert layer.get_params() == [] def test_get_output_shape_for(self, layer): assert layer.get_output_shape_for((1, 2, 3, 4)) == (1, 2, 3, 4) def test_even_n_fails(self, input_layer): from lasagne.layers.normalization import\ LocalResponseNormalization2DLayer with pytest.raises(NotImplementedError): LocalResponseNormalization2DLayer(input_layer, n=4) def test_normalization(self, input_data, input_layer, layer): from lasagne.layers import get_output X = input_layer.input_var lrn = theano.function([X], get_output(layer, X)) out = lrn(input_data) # ground_truth_normalizer assumes c01b input_data_c01b = input_data.transpose([1, 2, 3, 0]) ground_out = ground_truth_normalizer(input_data_c01b, n=layer.n, k=layer.k, alpha=layer.alpha, beta=layer.beta) ground_out = np.transpose(ground_out, [3, 0, 1, 2]) assert out.shape == ground_out.shape assert np.allclose(out, ground_out) class TestBatchNormLayer: @pytest.fixture(params=(False, True), ids=('plain', 'dnn')) def BatchNormLayer(self, request): dnn = request.param if not dnn: from lasagne.layers.normalization import BatchNormLayer elif dnn: try: from lasagne.layers.dnn import ( BatchNormDNNLayer as BatchNormLayer) except ImportError: pytest.skip("cuDNN batch norm not available") return BatchNormLayer @pytest.fixture def init_unique(self): # initializer for a tensor of unique values return lambda shape: np.arange(np.prod(shape)).reshape(shape) def test_init(self, BatchNormLayer, init_unique): input_shape = (2, 3, 4) # default: normalize over all but second axis beta = BatchNormLayer(input_shape, beta=init_unique).beta assert np.allclose(beta.get_value(), init_unique((3,))) # normalize over first axis only beta = BatchNormLayer(input_shape, beta=init_unique, axes=0).beta assert np.allclose(beta.get_value(), init_unique((3, 4))) # normalize over second and third axis try: beta = BatchNormLayer( input_shape, beta=init_unique, axes=(1, 2)).beta assert np.allclose(beta.get_value(), init_unique((2,))) except ValueError as exc: assert "BatchNormDNNLayer only supports" in exc.args[0] @pytest.mark.parametrize('update_averages', [None, True, False]) @pytest.mark.parametrize('use_averages', [None, True, False]) @pytest.mark.parametrize('deterministic', [True, False]) def test_get_output_for(self, BatchNormLayer, deterministic, use_averages, update_averages): input_shape = (20, 30, 40) # random input tensor, beta, gamma, mean, inv_std and alpha input = (np.random.randn(*input_shape).astype(theano.config.floatX) + np.random.randn(1, 30, 1).astype(theano.config.floatX)) beta = np.random.randn(30).astype(theano.config.floatX) gamma = np.random.randn(30).astype(theano.config.floatX) mean = np.random.randn(30).astype(theano.config.floatX) inv_std = np.random.rand(30).astype(theano.config.floatX) alpha = np.random.rand() # create layer (with default axes: normalize over all but second axis) layer = BatchNormLayer(input_shape, beta=beta, gamma=gamma, mean=mean, inv_std=inv_std, alpha=alpha) # call get_output_for() kwargs = {'deterministic': deterministic} if use_averages is not None: kwargs['batch_norm_use_averages'] = use_averages else: use_averages = deterministic if update_averages is not None: kwargs['batch_norm_update_averages'] = update_averages else: update_averages = not deterministic result = layer.get_output_for(theano.tensor.constant(input), **kwargs).eval() # compute expected results and expected updated parameters input_mean = input.mean(axis=(0, 2)) input_inv_std = 1 / np.sqrt(input.var(axis=(0, 2)) + layer.epsilon) if use_averages: use_mean, use_inv_std = mean, inv_std else: use_mean, use_inv_std = input_mean, input_inv_std bcast = (np.newaxis, slice(None), np.newaxis) exp_result = (input - use_mean[bcast]) * use_inv_std[bcast] exp_result = exp_result * gamma[bcast] + beta[bcast] if update_averages: new_mean = (1 - alpha) * mean + alpha * input_mean new_inv_std = (1 - alpha) * inv_std + alpha * input_inv_std else: new_mean, new_inv_std = mean, inv_std # compare expected results to actual results tol = {'atol': 1e-5, 'rtol': 1e-6} assert np.allclose(layer.mean.get_value(), new_mean, **tol) assert np.allclose(layer.inv_std.get_value(), new_inv_std, **tol) assert np.allclose(result, exp_result, **tol) def test_undefined_shape(self, BatchNormLayer): # should work: BatchNormLayer((64, 2, None), axes=(0, 2)) # should not work: with pytest.raises(ValueError) as exc: BatchNormLayer((64, None, 3), axes=(0, 2)) assert 'needs specified input sizes' in exc.value.args[0] def test_skip_linear_transform(self, BatchNormLayer): input_shape = (20, 30, 40) # random input tensor, beta, gamma input = (np.random.randn(*input_shape).astype(theano.config.floatX) + np.random.randn(1, 30, 1).astype(theano.config.floatX)) beta = np.random.randn(30).astype(theano.config.floatX) gamma = np.random.randn(30).astype(theano.config.floatX) # create layers without beta or gamma layer1 = BatchNormLayer(input_shape, beta=None, gamma=gamma) layer2 = BatchNormLayer(input_shape, beta=beta, gamma=None) # check that one parameter is missing assert len(layer1.get_params()) == 3 assert len(layer2.get_params()) == 3 # call get_output_for() result1 = layer1.get_output_for(theano.tensor.constant(input), deterministic=False).eval() result2 = layer2.get_output_for(theano.tensor.constant(input), deterministic=False).eval() # compute expected results and expected updated parameters mean = input.mean(axis=(0, 2)) std = np.sqrt(input.var(axis=(0, 2)) + layer1.epsilon) exp_result = (input - mean[None, :, None]) / std[None, :, None] exp_result1 = exp_result * gamma[None, :, None] # no beta exp_result2 = exp_result + beta[None, :, None] # no gamma # compare expected results to actual results tol = {'atol': 1e-5, 'rtol': 1e-6} assert np.allclose(result1, exp_result1, **tol) assert np.allclose(result2, exp_result2, **tol) def test_batch_norm_tag(self, BatchNormLayer): input_shape = (20, 30, 40) layer = BatchNormLayer(input_shape) assert len(layer.get_params()) == 4 stat_params = layer.get_params(batch_norm_stat=True) assert len(stat_params) == 2 param_names = [p.name for p in stat_params] assert "mean" in param_names assert "inv_std" in param_names @pytest.mark.parametrize('dnn', [False, True]) def test_batch_norm_macro(dnn): if not dnn: from lasagne.layers import (BatchNormLayer, batch_norm) else: try: from lasagne.layers.dnn import ( BatchNormDNNLayer as BatchNormLayer, batch_norm_dnn as batch_norm) except ImportError: pytest.skip("cuDNN batch norm not available") from lasagne.layers import (Layer, NonlinearityLayer) from lasagne.nonlinearities import identity input_shape = (2, 3) obj = object() # check if it steals the nonlinearity layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj) bnstack = batch_norm(layer) assert isinstance(bnstack, NonlinearityLayer) assert isinstance(bnstack.input_layer, BatchNormLayer) assert layer.nonlinearity is identity assert bnstack.nonlinearity is obj # check if it removes the bias layer = Mock(Layer, output_shape=input_shape, b=obj, params={obj: set()}) bnstack = batch_norm(layer) assert isinstance(bnstack, BatchNormLayer) assert layer.b is None assert obj not in layer.params # check if it can handle an unset bias layer = Mock(Layer, output_shape=input_shape, b=None, params={obj: set()}) bnstack = batch_norm(layer) assert isinstance(bnstack, BatchNormLayer) assert layer.b is None # check if it passes on kwargs layer = Mock(Layer, output_shape=input_shape) bnstack = batch_norm(layer, name='foo') assert isinstance(bnstack, BatchNormLayer) assert bnstack.name == 'foo' # check if created layers are named with kwargs name layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj) layer.name = 'foo' bnstack = batch_norm(layer, name='foo_bnorm') assert isinstance(bnstack, NonlinearityLayer) assert isinstance(bnstack.input_layer, BatchNormLayer) assert bnstack.name == 'foo_bnorm_nonlin' assert bnstack.input_layer.name == 'foo_bnorm' # check if created layers are named with wrapped layer name layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj) layer.name = 'foo' bnstack = batch_norm(layer) assert isinstance(bnstack, NonlinearityLayer) assert isinstance(bnstack.input_layer, BatchNormLayer) assert bnstack.name == 'foo_bn_nonlin' assert bnstack.input_layer.name == 'foo_bn' # check if created layers remain unnamed if no names are given layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj) bnstack = batch_norm(layer) assert isinstance(bnstack, NonlinearityLayer) assert isinstance(bnstack.input_layer, BatchNormLayer) assert bnstack.name is None assert bnstack.input_layer.name is None class TestStandardizationLayer: @pytest.fixture def layer(self): from lasagne.layers.normalization import StandardizationLayer input_shape = (2, 3, 4) layer = StandardizationLayer(input_shape) return layer def test_get_params(self, layer): assert layer.get_params() == [] def test_get_output_shape_for(self, layer): assert layer.get_output_shape_for((1, 2, 3, 4)) == (1, 2, 3, 4) @pytest.mark.parametrize('axes', ['auto', 'features', 'spatial', (1,), 1]) @pytest.mark.parametrize('input_shape', [(5, 10), (5, 10, 15), (5, 10, 15, 15), (5, 10, 15, 15, 15)]) def test_get_output_for(self, axes, input_shape): rand_shape = [1]*len(input_shape) rand_shape[1] = 10 # random input tensor input = (np.random.randn(*input_shape).astype(theano.config.floatX) + np.random.randn(*rand_shape).astype(theano.config.floatX)) # create layer from lasagne.layers.normalization import StandardizationLayer layer = StandardizationLayer(input_shape, axes=axes) def get_exp_result(axis): input_mean = input.mean(axis=axis, keepdims=True) input_std = np.sqrt(input.var(axis=axis, keepdims=True) + layer.epsilon) return (input - input_mean) / input_std # choose normalization axes according to specification if axes == 'auto' and len(input_shape) > 2: axes = tuple(range(2, len(input_shape))) elif axes == 'auto' and len(input_shape) == 2: axes = (1,) elif axes == 'spatial': axes = tuple(range(2, len(input_shape))) elif axes == 'features': axes = tuple(range(1, len(input_shape))) else: axes = axes exp_result = get_exp_result(axes) kwargs = {'deterministic': True} result = layer.get_output_for(theano.tensor.constant(input), **kwargs).eval() # compare expected results to actual results tol = {'atol': 1e-5, 'rtol': 1e-6} assert np.allclose(result, exp_result, **tol) @pytest.mark.parametrize('learn_bias', [True, False]) @pytest.mark.parametrize('learn_scale', [True, False]) def test_instance_norm_macro(learn_bias, learn_scale): from lasagne.layers import (Layer, NonlinearityLayer, StandardizationLayer, ScaleLayer, BiasLayer, instance_norm) from lasagne.nonlinearities import identity input_shape = (2, 3, 4, 4) obj = object() # check if it steals the nonlinearity layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj) instack = instance_norm(layer, learn_bias=learn_bias, learn_scale=learn_scale) assert isinstance(instack, NonlinearityLayer) assert layer.nonlinearity is identity assert instack.nonlinearity is obj # check if layers are set according to specification if learn_bias: assert isinstance(instack.input_layer, BiasLayer) if learn_scale: assert isinstance(instack.input_layer.input_layer, ScaleLayer) assert isinstance(instack.input_layer.input_layer.input_layer, StandardizationLayer) else: assert isinstance(instack.input_layer.input_layer, StandardizationLayer) elif learn_scale: assert isinstance(instack.input_layer, ScaleLayer) assert isinstance(instack.input_layer.input_layer, StandardizationLayer) else: assert isinstance(instack.input_layer, StandardizationLayer) # check if it removes the bias layer = Mock(Layer, output_shape=input_shape, b=obj, params={obj: set()}) instack = instance_norm(layer) assert layer.b is None assert obj not in layer.params # check if it can handle an unset bias layer = Mock(Layer, output_shape=input_shape, b=None, params={obj: set()}) instack = instance_norm(layer) assert layer.b is None def test_instance_norm_macro_kwargs(): from lasagne.layers import (Layer, NonlinearityLayer, StandardizationLayer, ScaleLayer, BiasLayer, instance_norm) from lasagne.nonlinearities import identity input_shape = (2, 3, 4, 4) obj = object() # check if it passes on kwargs layer = Mock(Layer, output_shape=input_shape) instack = instance_norm(layer, learn_bias=False, learn_scale=False, name='foo') assert isinstance(instack, StandardizationLayer) assert instack.name == 'foo' # check if created layers are named with kwargs name layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj) layer.name = 'foo' instack = instance_norm(layer, name='foo_inorm') assert isinstance(instack, NonlinearityLayer) assert instack.name == 'foo_inorm_nonlin' assert isinstance(instack.input_layer, BiasLayer) assert instack.input_layer.name == 'foo_inorm_bias' assert isinstance(instack.input_layer.input_layer, ScaleLayer) assert instack.input_layer.input_layer.name == 'foo_inorm_scale' assert isinstance(instack.input_layer.input_layer.input_layer, StandardizationLayer) assert instack.input_layer.input_layer.input_layer.name == 'foo_inorm' # check if created layers are named with wrapped layer name layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj) layer.name = 'foo' instack = instance_norm(layer) assert isinstance(instack, NonlinearityLayer) assert instack.name == 'foo_in_nonlin' assert isinstance(instack.input_layer, BiasLayer) assert instack.input_layer.name == 'foo_in_bias' assert isinstance(instack.input_layer.input_layer, ScaleLayer) assert instack.input_layer.input_layer.name == 'foo_in_scale' assert isinstance(instack.input_layer.input_layer.input_layer, StandardizationLayer) assert instack.input_layer.input_layer.input_layer.name == 'foo_in' # check if created layers remain unnamed if no names are given layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj) instack = instance_norm(layer) assert isinstance(instack, NonlinearityLayer) assert instack.name is None assert isinstance(instack.input_layer, BiasLayer) assert instack.input_layer.name is None assert isinstance(instack.input_layer.input_layer, ScaleLayer) assert instack.input_layer.input_layer.name is None assert isinstance(instack.input_layer.input_layer.input_layer, StandardizationLayer) assert instack.input_layer.input_layer.input_layer.name is None def test_layer_norm_macro(): from lasagne.layers import (Layer, NonlinearityLayer, StandardizationLayer, ScaleLayer, BiasLayer, layer_norm) from lasagne.nonlinearities import identity input_shape = (2, 3) obj = object() # check if it steals the nonlinearity and applies StandardizationLayer, # ScaleLayer and BiasLayer on top of the input layer layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj) lnstack = layer_norm(layer) assert isinstance(lnstack, NonlinearityLayer) assert isinstance(lnstack.input_layer, BiasLayer) assert isinstance(lnstack.input_layer.input_layer, ScaleLayer) assert isinstance(lnstack.input_layer.input_layer.input_layer, StandardizationLayer) assert layer.nonlinearity is identity assert lnstack.nonlinearity is obj # check if it removes the bias layer = Mock(Layer, output_shape=input_shape, b=obj, params={obj: set()}) lnstack = layer_norm(layer) assert layer.b is None assert obj not in layer.params # check if it can handle an unset bias layer = Mock(Layer, output_shape=input_shape, b=None, params={obj: set()}) lnstack = layer_norm(layer) assert layer.b is None # check if it passes on kwargs layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj) lnstack = layer_norm(layer, name='foo_lnorm') assert isinstance(lnstack, NonlinearityLayer) assert lnstack.name == 'foo_lnorm_nonlin' assert isinstance(lnstack.input_layer, BiasLayer) assert lnstack.input_layer.name == 'foo_lnorm_bias' assert isinstance(lnstack.input_layer.input_layer, ScaleLayer) assert lnstack.input_layer.input_layer.name == 'foo_lnorm_scale' assert isinstance(lnstack.input_layer.input_layer.input_layer, StandardizationLayer) assert lnstack.input_layer.input_layer.input_layer.name == 'foo_lnorm' # check if created layers are named with kwargs name layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj) layer.name = 'foo' lnstack = layer_norm(layer, name='foo_lnorm') assert isinstance(lnstack, NonlinearityLayer) assert lnstack.name == 'foo_lnorm_nonlin' assert isinstance(lnstack.input_layer, BiasLayer) assert lnstack.input_layer.name == 'foo_lnorm_bias' assert isinstance(lnstack.input_layer.input_layer, ScaleLayer) assert lnstack.input_layer.input_layer.name == 'foo_lnorm_scale' assert isinstance(lnstack.input_layer.input_layer.input_layer, StandardizationLayer) assert lnstack.input_layer.input_layer.input_layer.name == 'foo_lnorm' # check if created layers are named with wrapped layer name layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj) layer.name = 'foo' lnstack = layer_norm(layer) assert isinstance(lnstack, NonlinearityLayer) assert lnstack.name == 'foo_ln_nonlin' assert isinstance(lnstack.input_layer, BiasLayer) assert lnstack.input_layer.name == 'foo_ln_bias' assert isinstance(lnstack.input_layer.input_layer, ScaleLayer) assert lnstack.input_layer.input_layer.name == 'foo_ln_scale' assert isinstance(lnstack.input_layer.input_layer.input_layer, StandardizationLayer) assert lnstack.input_layer.input_layer.input_layer.name == 'foo_ln' # check if created layers remain unnamed if no names are given layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj) lnstack = layer_norm(layer) assert isinstance(lnstack, NonlinearityLayer) assert lnstack.name is None assert isinstance(lnstack.input_layer, BiasLayer) assert lnstack.input_layer.name is None assert isinstance(lnstack.input_layer.input_layer, ScaleLayer) assert lnstack.input_layer.input_layer.name is None assert isinstance(lnstack.input_layer.input_layer.input_layer, StandardizationLayer) assert lnstack.input_layer.input_layer.input_layer.name is None Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/conftest.py0000644000175000017500000000052313307306052027331 0ustar sinclairssinclairsfrom mock import Mock import pytest @pytest.fixture def dummy_input_layer(): from lasagne.layers.input import InputLayer input_layer = InputLayer((2, 3, 4)) mock = Mock(input_layer) mock.shape = input_layer.shape mock.input_var = input_layer.input_var mock.output_shape = input_layer.output_shape return mock Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_pool.py0000644000175000017500000013701413307306052027522 0ustar sinclairssinclairsfrom mock import Mock import numpy as np import pytest import theano from lasagne.utils import floatX def max_pool_1d(data, pool_size, stride=None): stride = pool_size if stride is None else stride idx = range(data.shape[-1]) used_idx = set([]) idx_sets = [] i = 0 while i < data.shape[-1]: idx_set = set(range(i, i + pool_size)) idx_set = idx_set.intersection(idx) if not idx_set.issubset(used_idx): idx_sets.append(list(idx_set)) used_idx = used_idx.union(idx_set) i += stride data_pooled = np.array( [data[..., idx_set].max(axis=-1) for idx_set in idx_sets]) data_pooled = np.rollaxis(data_pooled, 0, len(data_pooled.shape)) return data_pooled def max_pool_1d_ignoreborder(data, pool_size, stride=None, pad=0): stride = pool_size if stride is None else stride pads = [(0, 0), ] * len(data.shape) pads[-1] = (pad, pad) data = np.pad(data, pads, mode='constant', constant_values=(-np.inf,)) data_shifted = np.zeros((pool_size,) + data.shape) data_shifted = data_shifted[..., :data.shape[-1] - pool_size + 1] for i in range(pool_size): data_shifted[i] = data[..., i:i + data.shape[-1] - pool_size + 1] data_pooled = data_shifted.max(axis=0) if stride: data_pooled = data_pooled[..., ::stride] return data_pooled def upscale_1d_shape(shape, scale_factor): return (shape[0], shape[1], shape[2] * scale_factor[0]) def upscale_1d(data, scale_factor): upscaled = np.zeros(upscale_1d_shape(data.shape, scale_factor)) for i in range(scale_factor[0]): upscaled[:, :, i::scale_factor[0]] = data return upscaled def upscale_1d_dilate(data, scale_factor): upscaled = np.zeros(upscale_1d_shape(data.shape, scale_factor)) upscaled[:, :, ::scale_factor[0]] = data return upscaled def max_pool_2d(data, pool_size, stride): data_pooled = max_pool_1d(data, pool_size[1], stride[1]) data_pooled = np.swapaxes(data_pooled, -1, -2) data_pooled = max_pool_1d(data_pooled, pool_size[0], stride[0]) data_pooled = np.swapaxes(data_pooled, -1, -2) return data_pooled def max_pool_2d_ignoreborder(data, pool_size, stride, pad): data_pooled = max_pool_1d_ignoreborder( data, pool_size[1], stride[1], pad[1]) data_pooled = np.swapaxes(data_pooled, -1, -2) data_pooled = max_pool_1d_ignoreborder( data_pooled, pool_size[0], stride[0], pad[0]) data_pooled = np.swapaxes(data_pooled, -1, -2) return data_pooled def max_pool_3d_ignoreborder(data, pool_size, stride, pad): # Pool last dim data_pooled = max_pool_1d_ignoreborder( data, pool_size[2], stride[2], pad[2]) # Swap second to last to back and pool it data_pooled = np.swapaxes(data_pooled, -1, -2) data_pooled = max_pool_1d_ignoreborder( data_pooled, pool_size[1], stride[1], pad[1]) # Swap third to last and pool data_pooled = np.swapaxes(data_pooled, -1, -3) data_pooled = max_pool_1d_ignoreborder( data_pooled, pool_size[0], stride[0], pad[0]) # Bring back in order data_pooled = np.swapaxes(data_pooled, -1, -2) data_pooled = np.swapaxes(data_pooled, -2, -3) return data_pooled def upscale_2d_shape(shape, scale_factor): return (shape[0], shape[1], shape[2] * scale_factor[0], shape[3] * scale_factor[1]) def upscale_2d(data, scale_factor): upscaled = np.zeros(upscale_2d_shape(data.shape, scale_factor)) for j in range(scale_factor[0]): for i in range(scale_factor[1]): upscaled[:, :, j::scale_factor[0], i::scale_factor[1]] = data return upscaled def upscale_2d_dilate(data, scale_factor): upscaled = np.zeros(upscale_2d_shape(data.shape, scale_factor)) upscaled[:, :, ::scale_factor[0], ::scale_factor[1]] = data return upscaled def upscale_3d_shape(shape, scale_factor): return (shape[0], shape[1], shape[2] * scale_factor[0], shape[3] * scale_factor[1], shape[4] * scale_factor[2]) def upscale_3d(data, scale_factor): upscaled = np.zeros(upscale_3d_shape(data.shape, scale_factor)) for j in range(scale_factor[0]): for i in range(scale_factor[1]): for k in range(scale_factor[2]): upscaled[:, :, j::scale_factor[0], i::scale_factor[1], k::scale_factor[2]] = data return upscaled def upscale_3d_dilate(data, scale_factor): upscaled = np.zeros(upscale_3d_shape(data.shape, scale_factor)) upscaled[:, :, ::scale_factor[0], ::scale_factor[1], ::scale_factor[2]] = data return upscaled def spatial_pool(data, pool_dims): def ceildiv(a, b): return (a + b - 1) // b def floordiv(a, b): return a // b input_size = data.shape[2:] pooled_data_list = [] for pool_dim in pool_dims: pool_size = tuple(ceildiv(i, pool_dim) for i in input_size) stride_size = tuple(floordiv(i, pool_dim) for i in input_size) pooled_part = max_pool_2d_ignoreborder( data, pool_size, stride_size, (0, 0)) pooled_part = pooled_part.reshape( data.shape[0], data.shape[1], pool_dim ** 2) pooled_data_list.append(pooled_part) return np.concatenate(pooled_data_list, axis=2) def np_pool_fixed_output_size(feature_maps, output_size, pool_op): m, c, h, w = feature_maps.shape result = np.zeros((m, c, output_size, output_size), dtype=feature_maps.dtype) n = float(output_size) for i in range(output_size): for j in range(output_size): start_h = int(np.floor((j)/n*h)) end_h = int(np.ceil((j+1)/n*h)) start_w = int(np.floor((i)/n*w)) end_w = int(np.ceil((i+1)/n*w)) region = feature_maps[:, :, start_h:end_h, start_w:end_w] result[:, :, j, i] = pool_op(region, axis=(2, 3)) return result def np_spatial_pool_kaiming(feature_maps, pool_sizes, mode): m, c = feature_maps.shape[0:2] if mode == 'max': op = np.max else: op = np.mean maps = [] for p in pool_sizes: pool_result = np_pool_fixed_output_size(feature_maps, p, op) maps.append(pool_result.reshape((m, c, -1))) return np.concatenate(maps, axis=2) class TestFeaturePoolLayer: def pool_test_sets(): for pool_size in [2, 3]: for axis in [1, 2]: yield (pool_size, axis) def input_layer(self, output_shape): from lasagne.layers.input import InputLayer return InputLayer(output_shape) def layer(self, input_layer, pool_size, axis): from lasagne.layers.pool import FeaturePoolLayer return FeaturePoolLayer( input_layer, pool_size=pool_size, axis=axis, ) def test_init_raises(self): input_layer = self.input_layer((2, 3, 4)) with pytest.raises(ValueError): self.layer(input_layer, pool_size=2, axis=1) @pytest.mark.parametrize( "pool_size, axis", list(pool_test_sets())) def test_layer(self, pool_size, axis): input = floatX(np.random.randn(3, 6, 12, 23)) input_layer = self.input_layer(input.shape) input_theano = theano.shared(input) layer = self.layer(input_layer, pool_size, axis) layer_result = layer.get_output_for(input_theano).eval() numpy_result = np.swapaxes(input, axis, -1) numpy_result = max_pool_1d(numpy_result, pool_size) numpy_result = np.swapaxes(numpy_result, -1, axis) assert np.all(numpy_result.shape == layer.output_shape) assert np.all(numpy_result.shape == layer_result.shape) assert np.allclose(numpy_result, layer_result) class TestMaxPool1DLayer: def pool_test_sets(): for pool_size in [2, 3]: for stride in [1, 2, 3, 4]: yield (pool_size, stride) def pool_test_sets_ignoreborder(): for pool_size in [2, 3]: for stride in [1, 2, 3, 4]: for pad in range(pool_size): yield (pool_size, stride, pad) def input_layer(self, output_shape): return Mock(output_shape=output_shape) def layer(self, input_layer, pool_size, stride=None, pad=0): from lasagne.layers.pool import MaxPool1DLayer return MaxPool1DLayer( input_layer, pool_size=pool_size, stride=stride, ignore_border=False, ) def layer_ignoreborder(self, input_layer, pool_size, stride=None, pad=0): from lasagne.layers.pool import MaxPool1DLayer return MaxPool1DLayer( input_layer, pool_size=pool_size, stride=stride, pad=pad, ignore_border=True, ) @pytest.mark.parametrize( "pool_size, stride", list(pool_test_sets())) def test_get_output_and_shape_for(self, pool_size, stride): input = floatX(np.random.randn(8, 16, 23)) input_layer = self.input_layer(input.shape) input_theano = theano.shared(input) layer = self.layer(input_layer, pool_size, stride) layer_output_shape = layer.get_output_shape_for(input.shape) layer_output = layer.get_output_for(input_theano) layer_result = layer_output.eval() numpy_result = max_pool_1d(input, pool_size, stride) assert numpy_result.shape == layer_output_shape assert np.allclose(numpy_result, layer_result) @pytest.mark.parametrize( "pool_size, stride, pad", list(pool_test_sets_ignoreborder())) def test_get_output_for_ignoreborder(self, pool_size, stride, pad): input = floatX(np.random.randn(8, 16, 23)) input_layer = self.input_layer(input.shape) input_theano = theano.shared(input) layer_output = self.layer_ignoreborder( input_layer, pool_size, stride, pad).get_output_for(input_theano) layer_result = layer_output.eval() numpy_result = max_pool_1d_ignoreborder(input, pool_size, stride, pad) assert np.all(numpy_result.shape == layer_result.shape) assert np.allclose(numpy_result, layer_result) @pytest.mark.parametrize( "input_shape", [(32, 64, 128), (None, 64, 128), (32, None, 128), (32, 64, None)]) def test_get_output_shape_for(self, input_shape): input_layer = self.input_layer(input_shape) layer = self.layer_ignoreborder(input_layer, pool_size=2) assert layer.get_output_shape_for((None, 64, 128)) == (None, 64, 64) assert layer.get_output_shape_for((32, 64, None)) == (32, 64, None) assert layer.get_output_shape_for((32, 64, 128)) == (32, 64, 64) def test_fail_on_mismatching_dimensionality(self): from lasagne.layers.pool import MaxPool1DLayer with pytest.raises(ValueError) as exc: MaxPool1DLayer((10, 20), 3, 2) assert "Expected 3 input dimensions" in exc.value.args[0] with pytest.raises(ValueError) as exc: MaxPool1DLayer((10, 20, 30, 40), 3, 2) assert "Expected 3 input dimensions" in exc.value.args[0] class TestMaxPool2DLayer: def pool_test_sets(): for pool_size in [2, 3]: for stride in [1, 2, 3, 4]: yield (pool_size, stride) def pool_test_sets_ignoreborder(): for pool_size in [2, 3]: for stride in [1, 2, 3, 4]: for pad in range(pool_size): yield (pool_size, stride, pad) def input_layer(self, output_shape): return Mock(output_shape=output_shape) def layer(self, input_layer, pool_size, stride=None, pad=(0, 0), ignore_border=False): from lasagne.layers.pool import MaxPool2DLayer return MaxPool2DLayer( input_layer, pool_size=pool_size, stride=stride, pad=pad, ignore_border=ignore_border, ) @pytest.mark.parametrize( "pool_size, stride", list(pool_test_sets())) def test_get_output_for(self, pool_size, stride): try: input = floatX(np.random.randn(8, 16, 17, 13)) input_layer = self.input_layer(input.shape) input_theano = theano.shared(input) result = self.layer( input_layer, (pool_size, pool_size), (stride, stride), ignore_border=False, ).get_output_for(input_theano) result_eval = result.eval() numpy_result = max_pool_2d( input, (pool_size, pool_size), (stride, stride)) assert np.all(numpy_result.shape == result_eval.shape) assert np.allclose(result_eval, numpy_result) except NotImplementedError: pytest.skip() @pytest.mark.parametrize( "pool_size, stride, pad", list(pool_test_sets_ignoreborder())) def test_get_output_for_ignoreborder(self, pool_size, stride, pad): try: input = floatX(np.random.randn(8, 16, 17, 13)) input_layer = self.input_layer(input.shape) input_theano = theano.shared(input) result = self.layer( input_layer, pool_size, stride, pad, ignore_border=True, ).get_output_for(input_theano) result_eval = result.eval() numpy_result = max_pool_2d_ignoreborder( input, (pool_size, pool_size), (stride, stride), (pad, pad)) assert np.all(numpy_result.shape == result_eval.shape) assert np.allclose(result_eval, numpy_result) except NotImplementedError: pytest.skip() @pytest.mark.parametrize( "input_shape,output_shape", [((32, 64, 24, 24), (32, 64, 12, 12)), ((None, 64, 24, 24), (None, 64, 12, 12)), ((32, None, 24, 24), (32, None, 12, 12)), ((32, 64, None, 24), (32, 64, None, 12)), ((32, 64, 24, None), (32, 64, 12, None)), ((32, 64, None, None), (32, 64, None, None))], ) def test_get_output_shape_for(self, input_shape, output_shape): try: input_layer = self.input_layer(input_shape) layer = self.layer(input_layer, pool_size=(2, 2), stride=None) assert layer.get_output_shape_for( input_shape) == output_shape except NotImplementedError: pytest.skip() def test_fail_on_mismatching_dimensionality(self): from lasagne.layers.pool import MaxPool2DLayer with pytest.raises(ValueError) as exc: MaxPool2DLayer((10, 20, 30), 3, 2) assert "Expected 4 input dimensions" in exc.value.args[0] with pytest.raises(ValueError) as exc: MaxPool2DLayer((10, 20, 30, 40, 50), 3, 2) assert "Expected 4 input dimensions" in exc.value.args[0] class TestMaxPool2DCCLayer: def pool_test_sets(): for pool_size in [2, 3]: for stride in range(1, pool_size+1): yield (pool_size, stride) def input_layer(self, output_shape): return Mock(output_shape=output_shape) def layer(self, input_layer, pool_size, stride): try: from lasagne.layers.cuda_convnet import MaxPool2DCCLayer except ImportError: pytest.skip("cuda_convnet not available") return MaxPool2DCCLayer( input_layer, pool_size=pool_size, stride=stride, ) @pytest.mark.parametrize( "pool_size, stride", list(pool_test_sets())) def test_get_output_for(self, pool_size, stride): try: input = floatX(np.random.randn(8, 16, 16, 16)) input_layer = self.input_layer(input.shape) input_theano = theano.shared(input) result = self.layer( input_layer, (pool_size, pool_size), (stride, stride), ).get_output_for(input_theano) result_eval = result.eval() numpy_result = max_pool_2d( input, (pool_size, pool_size), (stride, stride)) assert np.all(numpy_result.shape == result_eval.shape) assert np.allclose(result_eval, numpy_result) except NotImplementedError: pytest.skip() @pytest.mark.parametrize( "input_shape,output_shape", [((32, 64, 24, 24), (32, 64, 12, 12)), ((None, 64, 24, 24), (None, 64, 12, 12)), ((32, None, 24, 24), (32, None, 12, 12)), ((32, 64, None, 24), (32, 64, None, 12)), ((32, 64, 24, None), (32, 64, 12, None)), ((32, 64, None, None), (32, 64, None, None))], ) def test_get_output_shape_for(self, input_shape, output_shape): try: input_layer = self.input_layer(input_shape) layer = self.layer(input_layer, pool_size=(2, 2), stride=None) assert layer.get_output_shape_for( input_shape) == output_shape except NotImplementedError: pytest.skip() def test_not_implemented(self): try: from lasagne.layers.cuda_convnet import MaxPool2DCCLayer except ImportError: pytest.skip("cuda_convnet not available") input_layer = self.input_layer((128, 4, 12, 12)) with pytest.raises(NotImplementedError) as exc: layer = MaxPool2DCCLayer(input_layer, pool_size=2, pad=2) assert "MaxPool2DCCLayer does not support padding" in exc.value.args[0] with pytest.raises(NotImplementedError) as exc: layer = MaxPool2DCCLayer(input_layer, pool_size=(2, 3)) assert ("MaxPool2DCCLayer only supports square pooling regions" in exc.value.args[0]) with pytest.raises(NotImplementedError) as exc: layer = MaxPool2DCCLayer(input_layer, pool_size=2, stride=(1, 2)) assert (("MaxPool2DCCLayer only supports using the same stride in " "both directions") in exc.value.args[0]) with pytest.raises(NotImplementedError) as exc: layer = MaxPool2DCCLayer(input_layer, pool_size=2, stride=3) assert ("MaxPool2DCCLayer only supports stride <= pool_size" in exc.value.args[0]) with pytest.raises(NotImplementedError) as exc: layer = MaxPool2DCCLayer(input_layer, pool_size=2, ignore_border=True) assert ("MaxPool2DCCLayer does not support ignore_border=True" in exc.value.args[0]) def test_dimshuffle_false(self): try: from lasagne.layers.cuda_convnet import MaxPool2DCCLayer except ImportError: pytest.skip("cuda_convnet not available") from lasagne.layers.input import InputLayer input_layer = InputLayer((4, 12, 12, 16)) # c01b order layer = MaxPool2DCCLayer(input_layer, pool_size=2, dimshuffle=False) assert layer.output_shape == (4, 6, 6, 16) input = floatX(np.random.randn(4, 12, 12, 16)) output = max_pool_2d(input.transpose(3, 0, 1, 2), (2, 2), (2, 2)) output = output.transpose(1, 2, 3, 0) actual = layer.get_output_for(input).eval() assert np.allclose(output, actual) class TestMaxPool2DNNLayer: def pool_test_sets_ignoreborder(): for pool_size in [2, 3]: for stride in [1, 2, 3, 4]: for pad in range(pool_size): yield (pool_size, stride, pad) def input_layer(self, output_shape): return Mock(output_shape=output_shape) def layer(self, input_layer, pool_size, stride, pad): try: from lasagne.layers.dnn import MaxPool2DDNNLayer except ImportError: pytest.skip("cuDNN not available") return MaxPool2DDNNLayer( input_layer, pool_size=pool_size, stride=stride, pad=pad, ) @pytest.mark.parametrize( "pool_size, stride, pad", list(pool_test_sets_ignoreborder())) def test_get_output_for_ignoreborder(self, pool_size, stride, pad): try: input = floatX(np.random.randn(8, 16, 17, 13)) input_layer = self.input_layer(input.shape) input_theano = theano.shared(input) result = self.layer( input_layer, pool_size, stride, pad, ).get_output_for(input_theano) result_eval = result.eval() numpy_result = max_pool_2d_ignoreborder( input, (pool_size, pool_size), (stride, stride), (pad, pad)) assert np.all(numpy_result.shape == result_eval.shape) assert np.allclose(result_eval, numpy_result) except NotImplementedError: pytest.skip() @pytest.mark.parametrize( "input_shape,output_shape", [((32, 64, 24, 24), (32, 64, 12, 12)), ((None, 64, 24, 24), (None, 64, 12, 12)), ((32, None, 24, 24), (32, None, 12, 12)), ((32, 64, None, 24), (32, 64, None, 12)), ((32, 64, 24, None), (32, 64, 12, None)), ((32, 64, None, None), (32, 64, None, None))], ) def test_get_output_shape_for(self, input_shape, output_shape): try: input_layer = self.input_layer(input_shape) layer = self.layer(input_layer, pool_size=(2, 2), stride=None, pad=(0, 0)) assert layer.get_output_shape_for( input_shape) == output_shape except NotImplementedError: raise # pytest.skip() def test_not_implemented(self): try: from lasagne.layers.dnn import MaxPool2DDNNLayer except ImportError: pytest.skip("cuDNN not available") with pytest.raises(NotImplementedError) as exc: layer = MaxPool2DDNNLayer((1, 2, 3, 4), pool_size=2, ignore_border=False) assert ("Pool2DDNNLayer does not support ignore_border=False" in exc.value.args[0]) def test_fail_on_mismatching_dimensionality(self): try: from lasagne.layers.dnn import MaxPool2DDNNLayer except ImportError: pytest.skip("cuDNN not available") with pytest.raises(ValueError) as exc: MaxPool2DDNNLayer((10, 20, 30), 3, 2) assert "Expected 4 input dimensions" in exc.value.args[0] with pytest.raises(ValueError) as exc: MaxPool2DDNNLayer((10, 20, 30, 40, 50), 3, 2) assert "Expected 4 input dimensions" in exc.value.args[0] class TestPool3DLayer: def pool_test_sets_ignoreborder(): for pool_size in [2, 3]: for stride in [1, 2, 3, 4]: for pad in range(pool_size): yield (pool_size, stride, pad) def input_layer(self, output_shape): return Mock(output_shape=output_shape) def layer(self, input_layer, pool_size, stride, pad): try: from lasagne.layers.pool import Pool3DLayer except ImportError: pytest.skip("theano.signal.pool.pool_3d not available") return Pool3DLayer( input_layer, pool_size=pool_size, stride=stride, pad=pad, ) @pytest.mark.parametrize( "pool_size, stride, pad", list(pool_test_sets_ignoreborder())) def test_get_output_for_ignoreborder(self, pool_size, stride, pad): try: input = floatX(np.random.randn(5, 8, 16, 17, 13)) input_layer = self.input_layer(input.shape) input_theano = theano.shared(input) result = self.layer( input_layer, pool_size, stride, pad, ).get_output_for(input_theano) result_eval = result.eval() numpy_result = max_pool_3d_ignoreborder( input, [pool_size]*3, [stride]*3, [pad]*3) assert np.all(numpy_result.shape == result_eval.shape) assert np.allclose(result_eval, numpy_result) except NotImplementedError: pytest.skip() @pytest.mark.parametrize( "input_shape,output_shape", [((32, 32, 64, 24, 24), (32, 32, 32, 12, 12)), ((None, 32, 48, 24, 24), (None, 32, 24, 12, 12)), ((32, None, 32, 24, 24), (32, None, 16, 12, 12)), ((32, 64, None, 24, 24), (32, 64, None, 12, 12)), ((32, 64, 32, None, 24), (32, 64, 16, None, 12)), ((32, 64, 32, 24, None), (32, 64, 16, 12, None)), ((32, 64, 12, None, None), (32, 64, 6, None, None)), ((32, 64, None, None, None), (32, 64, None, None, None))], ) def test_get_output_shape_for(self, input_shape, output_shape): try: input_layer = self.input_layer(input_shape) layer = self.layer(input_layer, pool_size=(2, 2, 2), stride=None, pad=(0, 0, 0)) assert layer.get_output_shape_for( input_shape) == output_shape except NotImplementedError: raise # pytest.skip() def test_fail_on_mismatching_dimensionality(self): try: from lasagne.layers.pool import Pool3DLayer except ImportError: pytest.skip("theano.signal.pool.pool_3d not available") with pytest.raises(ValueError) as exc: Pool3DLayer((10, 20, 30, 40), 3, 2) assert "Expected 5 input dimensions" in exc.value.args[0] with pytest.raises(ValueError) as exc: Pool3DLayer((10, 20, 30, 40, 50, 60), 3, 2) assert "Expected 5 input dimensions" in exc.value.args[0] class TestMaxPool3DNNLayer: def pool_test_sets_ignoreborder(): for pool_size in [2, 3]: for stride in [1, 2, 3, 4]: for pad in range(pool_size): yield (pool_size, stride, pad) def input_layer(self, output_shape): return Mock(output_shape=output_shape) def layer(self, input_layer, pool_size, stride, pad): try: from lasagne.layers.dnn import MaxPool3DDNNLayer except ImportError: pytest.skip("cuDNN not available") return MaxPool3DDNNLayer( input_layer, pool_size=pool_size, stride=stride, pad=pad, ) @pytest.mark.parametrize( "pool_size, stride, pad", list(pool_test_sets_ignoreborder())) def test_get_output_for_ignoreborder(self, pool_size, stride, pad): try: input = floatX(np.random.randn(5, 8, 16, 17, 13)) input_layer = self.input_layer(input.shape) input_theano = theano.shared(input) result = self.layer( input_layer, pool_size, stride, pad, ).get_output_for(input_theano) result_eval = result.eval() numpy_result = max_pool_3d_ignoreborder( input, [pool_size]*3, [stride]*3, [pad]*3) assert np.all(numpy_result.shape == result_eval.shape) assert np.allclose(result_eval, numpy_result) except NotImplementedError: pytest.skip() @pytest.mark.parametrize( "input_shape,output_shape", [((32, 32, 64, 24, 24), (32, 32, 32, 12, 12)), ((None, 32, 48, 24, 24), (None, 32, 24, 12, 12)), ((32, None, 32, 24, 24), (32, None, 16, 12, 12)), ((32, 64, None, 24, 24), (32, 64, None, 12, 12)), ((32, 64, 32, None, 24), (32, 64, 16, None, 12)), ((32, 64, 32, 24, None), (32, 64, 16, 12, None)), ((32, 64, 12, None, None), (32, 64, 6, None, None)), ((32, 64, None, None, None), (32, 64, None, None, None))], ) def test_get_output_shape_for(self, input_shape, output_shape): try: input_layer = self.input_layer(input_shape) layer = self.layer(input_layer, pool_size=(2, 2, 2), stride=None, pad=(0, 0, 0)) assert layer.get_output_shape_for( input_shape) == output_shape except NotImplementedError: raise # pytest.skip() def test_not_implemented(self): try: from lasagne.layers.dnn import MaxPool3DDNNLayer except ImportError: pytest.skip("cuDNN not available") with pytest.raises(NotImplementedError) as exc: layer = MaxPool3DDNNLayer((1, 2, 3, 4, 5), pool_size=2, ignore_border=False) assert ("Pool3DDNNLayer does not support ignore_border=False" in exc.value.args[0]) def test_fail_on_mismatching_dimensionality(self): try: from lasagne.layers.dnn import MaxPool3DDNNLayer except ImportError: pytest.skip("cuDNN not available") with pytest.raises(ValueError) as exc: MaxPool3DDNNLayer((10, 20, 30, 40), 3, 2) assert "Expected 5 input dimensions" in exc.value.args[0] with pytest.raises(ValueError) as exc: MaxPool3DDNNLayer((10, 20, 30, 40, 50, 60), 3, 2) assert "Expected 5 input dimensions" in exc.value.args[0] class TestUpscale1DLayer: def scale_factor_test_sets(): for scale_factor in [2, 3]: yield scale_factor def mode_test_sets(): for mode in ['repeat', 'dilate']: yield mode def input_layer(self, output_shape): return Mock(output_shape=output_shape) def layer(self, input_layer, scale_factor, mode): from lasagne.layers.pool import Upscale1DLayer return Upscale1DLayer( input_layer, scale_factor=scale_factor, mode=mode, ) def test_invalid_scale_factor(self): from lasagne.layers.pool import Upscale1DLayer inlayer = self.input_layer((128, 3, 32)) with pytest.raises(ValueError): Upscale1DLayer(inlayer, scale_factor=0) with pytest.raises(ValueError): Upscale1DLayer(inlayer, scale_factor=-1) with pytest.raises(ValueError): Upscale1DLayer(inlayer, scale_factor=(0)) def test_invalid_mode(self): from lasagne.layers.pool import Upscale1DLayer inlayer = self.input_layer((128, 3, 32)) with pytest.raises(ValueError): Upscale1DLayer(inlayer, scale_factor=1, mode='') with pytest.raises(ValueError): Upscale1DLayer(inlayer, scale_factor=1, mode='other') with pytest.raises(ValueError): Upscale1DLayer(inlayer, scale_factor=1, mode=0) @pytest.mark.parametrize( "scale_factor", list(scale_factor_test_sets())) @pytest.mark.parametrize( "mode", list(mode_test_sets())) def test_get_output_for(self, scale_factor, mode): input = floatX(np.random.randn(8, 16, 17)) input_layer = self.input_layer(input.shape) input_theano = theano.shared(input) result = self.layer( input_layer, (scale_factor), mode, ).get_output_for(input_theano) result_eval = result.eval() if mode in {'repeat', None}: numpy_result = upscale_1d(input, (scale_factor, scale_factor)) elif mode == 'dilate': numpy_result = upscale_1d_dilate(input, (scale_factor, scale_factor)) assert np.all(numpy_result.shape == result_eval.shape) assert np.allclose(result_eval, numpy_result) @pytest.mark.parametrize( "input_shape,output_shape", [((32, 64, 24), (32, 64, 48)), ((None, 64, 24), (None, 64, 48)), ((32, None, 24), (32, None, 48)), ((32, 64, None), (32, 64, None))], ) @pytest.mark.parametrize( "mode", list(mode_test_sets())) def test_get_output_shape_for(self, input_shape, output_shape, mode): input_layer = self.input_layer(input_shape) layer = self.layer(input_layer, scale_factor=(2), mode=mode) assert layer.get_output_shape_for( input_shape) == output_shape class TestUpscale2DLayer: def scale_factor_test_sets(): for scale_factor in [2, 3]: yield scale_factor def mode_test_sets(): for mode in ['repeat', 'dilate']: yield mode def input_layer(self, output_shape): return Mock(output_shape=output_shape) def layer(self, input_layer, scale_factor, mode): from lasagne.layers.pool import Upscale2DLayer return Upscale2DLayer( input_layer, scale_factor=scale_factor, mode=mode, ) def test_invalid_scale_factor(self): from lasagne.layers.pool import Upscale2DLayer inlayer = self.input_layer((128, 3, 32, 32)) with pytest.raises(ValueError): Upscale2DLayer(inlayer, scale_factor=0) with pytest.raises(ValueError): Upscale2DLayer(inlayer, scale_factor=-1) with pytest.raises(ValueError): Upscale2DLayer(inlayer, scale_factor=(0, 2)) with pytest.raises(ValueError): Upscale2DLayer(inlayer, scale_factor=(2, 0)) def test_invalid_mode(self): from lasagne.layers.pool import Upscale2DLayer inlayer = self.input_layer((128, 3, 32, 32)) with pytest.raises(ValueError): Upscale2DLayer(inlayer, scale_factor=1, mode='') with pytest.raises(ValueError): Upscale2DLayer(inlayer, scale_factor=1, mode='other') with pytest.raises(ValueError): Upscale2DLayer(inlayer, scale_factor=1, mode=0) @pytest.mark.parametrize( "scale_factor", list(scale_factor_test_sets())) @pytest.mark.parametrize( "mode", list(mode_test_sets())) def test_get_output_for(self, scale_factor, mode): input = floatX(np.random.randn(8, 16, 17, 13)) input_layer = self.input_layer(input.shape) input_theano = theano.shared(input) result = self.layer( input_layer, (scale_factor, scale_factor), mode, ).get_output_for(input_theano) result_eval = result.eval() if mode in {'repeat', None}: numpy_result = upscale_2d(input, (scale_factor, scale_factor)) elif mode == 'dilate': numpy_result = upscale_2d_dilate(input, (scale_factor, scale_factor)) assert np.all(numpy_result.shape == result_eval.shape) assert np.allclose(result_eval, numpy_result) @pytest.mark.parametrize( "input_shape,output_shape", [((32, 64, 24, 24), (32, 64, 48, 48)), ((None, 64, 24, 24), (None, 64, 48, 48)), ((32, None, 24, 24), (32, None, 48, 48)), ((32, 64, None, 24), (32, 64, None, 48)), ((32, 64, 24, None), (32, 64, 48, None)), ((32, 64, None, None), (32, 64, None, None))], ) @pytest.mark.parametrize( "mode", list(mode_test_sets())) def test_get_output_shape_for(self, input_shape, output_shape, mode): input_layer = self.input_layer(input_shape) layer = self.layer(input_layer, scale_factor=(2, 2), mode=mode) assert layer.get_output_shape_for( input_shape) == output_shape class TestUpscale3DLayer: def scale_factor_test_sets(): for scale_factor in [2, 3]: yield scale_factor def mode_test_sets(): for mode in ['repeat', 'dilate']: yield mode def input_layer(self, output_shape): return Mock(output_shape=output_shape) def layer(self, input_layer, scale_factor, mode): from lasagne.layers.pool import Upscale3DLayer return Upscale3DLayer( input_layer, scale_factor=scale_factor, mode=mode, ) def test_invalid_scale_factor(self): from lasagne.layers.pool import Upscale3DLayer inlayer = self.input_layer((128, 3, 32, 32, 32)) with pytest.raises(ValueError): Upscale3DLayer(inlayer, scale_factor=0) with pytest.raises(ValueError): Upscale3DLayer(inlayer, scale_factor=-1) with pytest.raises(ValueError): Upscale3DLayer(inlayer, scale_factor=(0, 2, 0)) with pytest.raises(ValueError): Upscale3DLayer(inlayer, scale_factor=(2, 0, -1)) def test_invalid_mode(self): from lasagne.layers.pool import Upscale3DLayer inlayer = self.input_layer((128, 3, 32, 32, 32)) with pytest.raises(ValueError): Upscale3DLayer(inlayer, scale_factor=1, mode='') with pytest.raises(ValueError): Upscale3DLayer(inlayer, scale_factor=1, mode='other') with pytest.raises(ValueError): Upscale3DLayer(inlayer, scale_factor=1, mode=0) @pytest.mark.parametrize( "scale_factor", list(scale_factor_test_sets())) @pytest.mark.parametrize( "mode", list(mode_test_sets())) def test_get_output_for(self, scale_factor, mode): input = floatX(np.random.randn(8, 16, 17, 13, 15)) input_layer = self.input_layer(input.shape) input_theano = theano.shared(input) result = self.layer( input_layer, (scale_factor, scale_factor, scale_factor), mode, ).get_output_for(input_theano) result_eval = result.eval() if mode in {'repeat', None}: numpy_result = upscale_3d(input, (scale_factor, scale_factor, scale_factor)) elif mode == 'dilate': numpy_result = upscale_3d_dilate(input, (scale_factor, scale_factor, scale_factor)) assert np.all(numpy_result.shape == result_eval.shape) assert np.allclose(result_eval, numpy_result) @pytest.mark.parametrize( "input_shape,output_shape", [((32, 64, 24, 24, 24), (32, 64, 48, 48, 48)), ((None, 64, 24, 24, 24), (None, 64, 48, 48, 48)), ((32, None, 24, 24, 24), (32, None, 48, 48, 48)), ((32, 64, None, 24, 24), (32, 64, None, 48, 48)), ((32, 64, 24, None, 24), (32, 64, 48, None, 48)), ((32, 64, None, None, 24), (32, 64, None, None, 48)), ((32, 64, None, None, None), (32, 64, None, None, None)), ((32, 64, 24, 24, None), (32, 64, 48, 48, None))] ) @pytest.mark.parametrize( "mode", list(mode_test_sets())) def test_get_output_shape_for(self, input_shape, output_shape, mode): input_layer = self.input_layer(input_shape) layer = self.layer(input_layer, scale_factor=(2, 2, 2), mode=mode) assert layer.get_output_shape_for( input_shape) == output_shape class TestFeatureWTALayer(object): @pytest.fixture def FeatureWTALayer(self): from lasagne.layers.pool import FeatureWTALayer return FeatureWTALayer @pytest.fixture def input_layer(self): from lasagne.layers.input import InputLayer return InputLayer((2, 4, 8)) @pytest.fixture def layer(self, FeatureWTALayer, input_layer): return FeatureWTALayer(input_layer, pool_size=2) def test_init_raises(self, FeatureWTALayer, input_layer): with pytest.raises(ValueError): FeatureWTALayer(input_layer, pool_size=3) def test_get_output_for(self, layer): input = theano.shared(np.random.uniform(-1, 1, (2, 4, 8))) result = layer.get_output_for(input).eval() reshaped = input.get_value().reshape((2, 2, 2, 8)) np_result = reshaped * (reshaped == reshaped.max(2, keepdims=True)) np_result = np_result.reshape((2, 4, 8)) assert np.allclose(result, np_result) class TestGlobalPoolLayer(object): @pytest.fixture def GlobalPoolLayer(self): from lasagne.layers.pool import GlobalPoolLayer return GlobalPoolLayer @pytest.fixture def layer(self, GlobalPoolLayer): return GlobalPoolLayer(Mock(output_shape=(None,))) def test_get_output_shape_for(self, layer): assert layer.get_output_shape_for((2, 3, 4, 5)) == (2, 3) def test_get_output_for(self, layer): input = theano.shared(np.random.uniform(-1, 1, (2, 3, 4, 5))) result = layer.get_output_for(input).eval() np_result = input.get_value().reshape((2, 3, -1)).mean(-1) assert np.allclose(result, np_result) class TestSpatialPyramidPoolingDNNLayer: def pool_dims_test_sets(): for pyramid_level in [2, 3, 4]: pool_dims = list(range(1, pyramid_level)) yield pool_dims def input_layer(self, output_shape): return Mock(output_shape=output_shape) def layer(self, input_layer, pool_dims): try: from lasagne.layers.dnn import SpatialPyramidPoolingDNNLayer except ImportError: pytest.skip("cuDNN not available") return SpatialPyramidPoolingDNNLayer(input_layer, pool_dims=pool_dims) @pytest.mark.parametrize( "pool_dims", list(pool_dims_test_sets())) @pytest.mark.parametrize( "fixed", [True, False]) def test_get_output_for(self, pool_dims, fixed): try: input = floatX(np.random.randn(8, 16, 17, 13)) if fixed: input_layer = self.input_layer(input.shape) else: input_layer = self.input_layer((None, None, None, None)) input_theano = theano.shared(input) layer = self.layer(input_layer, pool_dims) result = layer.get_output_for(input_theano) result_eval = result.eval() numpy_result = spatial_pool(input, pool_dims) assert result_eval.shape == numpy_result.shape assert np.allclose(result_eval, numpy_result) assert result_eval.shape[2] == layer.output_shape[2] except NotImplementedError: pytest.skip() @pytest.mark.parametrize( "input_shape,output_shape", [((32, 64, 24, 24), (32, 64, 21)), ((None, 64, 23, 25), (None, 64, 21)), ((32, None, 22, 26), (32, None, 21)), ((None, None, None, None), (None, None, 21))], ) def test_get_output_shape_for(self, input_shape, output_shape): try: input_layer = self.input_layer(input_shape) layer = self.layer(input_layer, pool_dims=[1, 2, 4]) assert layer.get_output_shape_for(input_shape) == output_shape except NotImplementedError: raise def test_fail_on_mismatching_dimensionality(self): try: from lasagne.layers.dnn import SpatialPyramidPoolingDNNLayer except ImportError: pytest.skip("cuDNN not available") with pytest.raises(ValueError) as exc: SpatialPyramidPoolingDNNLayer((10, 20, 30)) assert "Expected 4 input dimensions" in exc.value.args[0] with pytest.raises(ValueError) as exc: SpatialPyramidPoolingDNNLayer((10, 20, 30, 40, 50)) assert "Expected 4 input dimensions" in exc.value.args[0] class TestSpatialPyramidPoolingLayer: def pool_dims_test_sets(): for pyramid_level in [2, 3, 4]: pool_dims = list(range(1, pyramid_level)) yield pool_dims def input_layer(self, output_shape): return Mock(output_shape=output_shape) def layer(self, input_layer, pool_dims, mode='max', implementation='fast'): from lasagne.layers import SpatialPyramidPoolingLayer if implementation != 'kaiming': try: import theano.tensor as T from lasagne.layers.pool import pool_2d pool_2d(T.tensor4(), ws=T.ivector(), stride=T.ivector(), ignore_border=True, pad=None) except ValueError: pytest.skip('Old theano version') return SpatialPyramidPoolingLayer(input_layer, pool_dims=pool_dims, mode=mode, implementation=implementation) @pytest.mark.parametrize( "pool_dims", list(pool_dims_test_sets())) @pytest.mark.parametrize( "fixed", [True, False]) def test_get_output_for_fast(self, pool_dims, fixed): try: input = floatX(np.random.randn(8, 16, 17, 13)) if fixed: input_layer = self.input_layer(input.shape) else: input_layer = self.input_layer((None, None, None, None)) input_theano = theano.shared(input) layer = self.layer(input_layer, pool_dims) result = layer.get_output_for(input_theano) result_eval = result.eval() numpy_result = spatial_pool(input, pool_dims) assert result_eval.shape == numpy_result.shape assert np.allclose(result_eval, numpy_result) assert result_eval.shape[2] == layer.output_shape[2] except NotImplementedError: pytest.skip() @pytest.mark.parametrize( "pool_dims", list(pool_dims_test_sets())) @pytest.mark.parametrize( "fixed", [True, False]) @pytest.mark.parametrize( "mode", ['max', 'average_exc_pad']) def test_get_output_for_kaiming(self, pool_dims, fixed, mode): try: input = floatX(np.random.randn(8, 16, 17, 13)) if fixed: input_layer = self.input_layer(input.shape) else: input_layer = self.input_layer((None, None, None, None)) input_theano = theano.shared(input) layer = self.layer(input_layer, pool_dims, mode=mode, implementation='kaiming') result = layer.get_output_for(input_theano) result_eval = result.eval() numpy_result = np_spatial_pool_kaiming(input, pool_dims, mode) assert result_eval.shape == numpy_result.shape assert np.allclose(result_eval, numpy_result, atol=1e-7) assert result_eval.shape[2] == layer.output_shape[2] except NotImplementedError: pytest.skip() @pytest.mark.parametrize( "input_shape,output_shape", [((32, 64, 24, 24), (32, 64, 21)), ((None, 64, 23, 25), (None, 64, 21)), ((32, None, 22, 26), (32, None, 21)), ((None, None, None, None), (None, None, 21))], ) def test_get_output_shape_for(self, input_shape, output_shape): try: input_layer = self.input_layer(input_shape) layer = self.layer(input_layer, pool_dims=[1, 2, 4]) assert layer.get_output_shape_for(input_shape) == output_shape except NotImplementedError: raise def test_fail_on_mismatching_dimensionality(self): from lasagne.layers import SpatialPyramidPoolingLayer with pytest.raises(ValueError) as exc: SpatialPyramidPoolingLayer((10, 20, 30)) assert "Expected 4 input dimensions" in exc.value.args[0] with pytest.raises(ValueError) as exc: SpatialPyramidPoolingLayer((10, 20, 30, 40, 50)) assert "Expected 4 input dimensions" in exc.value.args[0] def test_fail_invalid_mode(self): with pytest.raises(ValueError) as exc: input = self.input_layer((None, None, None, None)) layer = self.layer(input, pool_dims=[1], mode='other', implementation='kaiming') layer.get_output_for(Mock(shape=(1, 1, 1, 1))) assert "Mode must be either 'max', 'average_inc_pad' or " \ "'average_exc_pad'. Got 'other'" in exc.value.args[0] Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_special.py0000644000175000017500000010320713307306052030166 0ustar sinclairssinclairsfrom mock import Mock import numpy as np import pytest import theano from lasagne.layers import InputLayer, standardize, get_output, get_all_params from lasagne.utils import floatX class TestExpressionLayer: @pytest.fixture def ExpressionLayer(self): from lasagne.layers.special import ExpressionLayer return ExpressionLayer @pytest.fixture def input_layer(self): from lasagne.layers import InputLayer return InputLayer((2, 3, 4, 5)) @pytest.fixture def input_layer_nones(self): from lasagne.layers import InputLayer return InputLayer((1, None, None, 5)) def np_result(self, func, input_layer): X = np.random.uniform(-1, 1, input_layer.output_shape) return X, func(X) @pytest.mark.parametrize('func', [lambda X: X**2, lambda X: X.mean(-1), lambda X: X.sum(), ]) def test_tuple_shape(self, func, input_layer, ExpressionLayer): from lasagne.layers.helper import get_output X, expected = self.np_result(func, input_layer) layer = ExpressionLayer(input_layer, func, output_shape=expected.shape) assert layer.get_output_shape_for(X.shape) == expected.shape output = get_output(layer, X).eval() assert np.allclose(output, expected) @pytest.mark.parametrize('func', [lambda X: X**2, lambda X: X.mean(-1), lambda X: X.sum(), ]) def test_callable_shape(self, func, input_layer, ExpressionLayer): from lasagne.layers.helper import get_output X, expected = self.np_result(func, input_layer) def get_shape(input_shape): return func(np.empty(shape=input_shape)).shape layer = ExpressionLayer(input_layer, func, output_shape=get_shape) assert layer.get_output_shape_for(X.shape) == expected.shape output = get_output(layer, X).eval() assert np.allclose(output, expected) @pytest.mark.parametrize('func', [lambda X: X**2, lambda X: X.mean(-1), lambda X: X.sum(), ]) def test_none_shape(self, func, input_layer, ExpressionLayer): from lasagne.layers.helper import get_output X, expected = self.np_result(func, input_layer) layer = ExpressionLayer(input_layer, func, output_shape=None) if X.shape == expected.shape: assert layer.get_output_shape_for(X.shape) == expected.shape output = get_output(layer, X).eval() assert np.allclose(output, expected) @pytest.mark.parametrize('func', [lambda X: X**2, lambda X: X.mean(-1), lambda X: X.sum(), ]) def test_auto_shape(self, func, input_layer, ExpressionLayer): from lasagne.layers.helper import get_output X, expected = self.np_result(func, input_layer) layer = ExpressionLayer(input_layer, func, output_shape='auto') assert layer.get_output_shape_for(X.shape) == expected.shape output = get_output(layer, X).eval() assert np.allclose(output, expected) @pytest.mark.parametrize('func', [lambda X: X**2, lambda X: X.mean(-1), lambda X: X.sum(), ]) def test_nones_shape(self, func, input_layer_nones, ExpressionLayer): input_shape = input_layer_nones.output_shape np_shape = tuple(0 if s is None else s for s in input_shape) X = np.random.uniform(-1, 1, np_shape) expected = func(X) expected_shape = tuple(s if s else None for s in expected.shape) layer = ExpressionLayer(input_layer_nones, func, output_shape=expected_shape) assert layer.get_output_shape_for(input_shape) == expected_shape def get_shape(input_shape): return expected_shape layer = ExpressionLayer(input_layer_nones, func, output_shape=get_shape) assert layer.get_output_shape_for(input_shape) == expected_shape layer = ExpressionLayer(input_layer_nones, func, output_shape='auto') assert layer.get_output_shape_for(input_shape) == expected_shape class TestNonlinearityLayer: @pytest.fixture def NonlinearityLayer(self): from lasagne.layers.special import NonlinearityLayer return NonlinearityLayer @pytest.fixture def layer_vars(self, NonlinearityLayer, dummy_input_layer): nonlinearity = Mock() layer = NonlinearityLayer( dummy_input_layer, nonlinearity=nonlinearity, ) return { 'nonlinearity': nonlinearity, 'layer': layer, } @pytest.fixture def layer(self, layer_vars): return layer_vars['layer'] def test_init_none_nonlinearity(self, NonlinearityLayer, dummy_input_layer): import lasagne.nonlinearities layer = NonlinearityLayer( dummy_input_layer, nonlinearity=None, ) assert layer.nonlinearity == lasagne.nonlinearities.identity def test_get_output_for(self, layer_vars): layer = layer_vars['layer'] nonlinearity = layer_vars['nonlinearity'] input = theano.tensor.matrix() result = layer.get_output_for(input) nonlinearity.assert_called_with(input) assert result is nonlinearity.return_value class TestBiasLayer: @pytest.fixture def BiasLayer(self): from lasagne.layers.special import BiasLayer return BiasLayer @pytest.fixture def init_b(self): # initializer for a tensor of unique values return lambda shape: np.arange(np.prod(shape)).reshape(shape) def test_bias_init(self, BiasLayer, init_b): input_shape = (2, 3, 4) # default: share biases over all but second axis b = BiasLayer(input_shape, b=init_b).b assert np.allclose(b.get_value(), init_b((3,))) # share over first axis only b = BiasLayer(input_shape, b=init_b, shared_axes=0).b assert np.allclose(b.get_value(), init_b((3, 4))) # share over second and third axis b = BiasLayer(input_shape, b=init_b, shared_axes=(1, 2)).b assert np.allclose(b.get_value(), init_b((2,))) # no bias b = BiasLayer(input_shape, b=None).b assert b is None def test_get_output_for(self, BiasLayer, init_b): input_shape = (2, 3, 4) # random input tensor input = np.random.randn(*input_shape).astype(theano.config.floatX) # default: share biases over all but second axis layer = BiasLayer(input_shape, b=init_b) assert np.allclose(layer.get_output_for(input).eval(), input + init_b((1, 3, 1))) # share over first axis only layer = BiasLayer(input_shape, b=init_b, shared_axes=0) assert np.allclose(layer.get_output_for(input).eval(), input + init_b((1, 3, 4))) # share over second and third axis layer = BiasLayer(input_shape, b=init_b, shared_axes=(1, 2)) assert np.allclose(layer.get_output_for(input).eval(), input + init_b((2, 1, 1))) # no bias layer = BiasLayer(input_shape, b=None) assert layer.get_output_for(input) is input def test_undefined_shape(self, BiasLayer): # should work: BiasLayer((64, None, 3), shared_axes=(1, 2)) # should not work: with pytest.raises(ValueError) as exc: BiasLayer((64, None, 3), shared_axes=(0, 2)) assert 'needs specified input sizes' in exc.value.args[0] class TestScaleLayer: @pytest.fixture def ScaleLayer(self): from lasagne.layers.special import ScaleLayer return ScaleLayer @pytest.fixture def init_scales(self): # initializer for a tensor of unique values return lambda shape: np.arange(np.prod(shape)).reshape(shape) def test_scales_init(self, ScaleLayer, init_scales): input_shape = (2, 3, 4) # default: share scales over all but second axis b = ScaleLayer(input_shape, scales=init_scales).scales assert np.allclose(b.get_value(), init_scales((3,))) # share over first axis only b = ScaleLayer(input_shape, scales=init_scales, shared_axes=0).scales assert np.allclose(b.get_value(), init_scales((3, 4))) # share over second and third axis b = ScaleLayer( input_shape, scales=init_scales, shared_axes=(1, 2)).scales assert np.allclose(b.get_value(), init_scales((2,))) def test_get_output_for(self, ScaleLayer, init_scales): input_shape = (2, 3, 4) # random input tensor input = np.random.randn(*input_shape).astype(theano.config.floatX) # default: share scales over all but second axis layer = ScaleLayer(input_shape, scales=init_scales) assert np.allclose(layer.get_output_for(input).eval(), input * init_scales((1, 3, 1))) # share over first axis only layer = ScaleLayer(input_shape, scales=init_scales, shared_axes=0) assert np.allclose(layer.get_output_for(input).eval(), input * init_scales((1, 3, 4))) # share over second and third axis layer = ScaleLayer(input_shape, scales=init_scales, shared_axes=(1, 2)) assert np.allclose(layer.get_output_for(input).eval(), input * init_scales((2, 1, 1))) def test_undefined_shape(self, ScaleLayer): # should work: ScaleLayer((64, None, 3), shared_axes=(1, 2)) # should not work: with pytest.raises(ValueError) as exc: ScaleLayer((64, None, 3), shared_axes=(0, 2)) assert 'needs specified input sizes' in exc.value.args[0] def test_standardize(): # Simple example X = np.random.standard_normal((1000, 20)).astype(theano.config.floatX) l_in = InputLayer((None, 20)) l_std = standardize( l_in, X.min(axis=0), (X.max(axis=0) - X.min(axis=0)), shared_axes=0) out = get_output(l_std).eval({l_in.input_var: X}) assert np.allclose(out.max(axis=0), 1.) assert np.allclose(out.min(axis=0), 0.) assert len(get_all_params(l_std)) == 2 # More complicated example X = np.random.standard_normal( (50, 3, 100, 10)).astype(theano.config.floatX) mean = X.mean(axis=(0, 2)) std = X.std(axis=(0, 2)) l_in = InputLayer((None, 3, None, 10)) l_std = standardize(l_in, mean, std, shared_axes=(0, 2)) out = get_output(l_std).eval({l_in.input_var: X}) assert np.allclose(out.mean(axis=(0, 2)), 0., atol=1e-5) assert np.allclose(out.std((0, 2)), 1., atol=1e-5) class TestInverseLayer: @pytest.fixture def invlayer_vars(self): from lasagne.layers.dense import DenseLayer from lasagne.layers.input import InputLayer from lasagne.layers.special import InverseLayer from lasagne.nonlinearities import identity l_in = InputLayer(shape=(10, 12)) layer = DenseLayer( l_in, num_units=3, b=None, nonlinearity=identity, ) invlayer = InverseLayer( incoming=layer, layer=layer ) return { 'layer': layer, 'invlayer': invlayer, } def test_init(self, invlayer_vars): layer = invlayer_vars['layer'] invlayer = invlayer_vars['invlayer'] # Check that the output shape of the invlayer is the same # as the input shape of the layer assert layer.input_shape == invlayer.output_shape def test_get_output_shape_for(self, invlayer_vars): invlayer = invlayer_vars['invlayer'] assert invlayer.get_output_shape_for( [(34, 55, 89, 144), (5, 8, 13, 21), (1, 1, 2, 3)]) == (1, 1, 2, 3) def test_get_output_for(self, invlayer_vars): from lasagne.layers.helper import get_output invlayer = invlayer_vars['invlayer'] layer = invlayer_vars['layer'] W = layer.W.get_value() input = theano.shared( np.random.rand(*layer.input_shape)) results = get_output(invlayer, inputs=input) # Check that the output of the invlayer is the output of the # dot product of the output of the dense layer and the # transposed weights assert np.allclose( results.eval(), np.dot(np.dot(input.get_value(), W), W.T)) class TestTransformLayer(): def test_transform_affine_errors(self): import lasagne with pytest.raises(ValueError): l_in_a = lasagne.layers.InputLayer((None, 3, 28, 28)) l_loc_a = lasagne.layers.DenseLayer(l_in_a, num_units=5) l_trans = lasagne.layers.TransformerLayer(l_in_a, l_loc_a) with pytest.raises(ValueError): l_in_b = lasagne.layers.InputLayer((3, 28, 28)) l_loc_b = lasagne.layers.DenseLayer(l_in_b, num_units=6) l_trans = lasagne.layers.TransformerLayer(l_in_b, l_loc_b) def test_transform_affine_downsample(self): import lasagne downsample = (0.7, 2.3) x = np.random.random((10, 3, 28, 28)).astype('float32') x_sym = theano.tensor.tensor4() # create transformer with fixed input size l_in = lasagne.layers.InputLayer((None, 3, 28, 28)) l_loc = lasagne.layers.DenseLayer(l_in, num_units=6) l_trans = lasagne.layers.TransformerLayer( l_in, l_loc, downsample_factor=downsample) # check that shape propagation works assert l_trans.output_shape[0] is None assert l_trans.output_shape[1:] == (3, int(28 / .7), int(28 / 2.3)) # check that data propagation works output = lasagne.layers.get_output(l_trans, x_sym) x_out = output.eval({x_sym: x}) assert x_out.shape[0] == x.shape[0] assert x_out.shape[1:] == l_trans.output_shape[1:] # create transformer with variable input size l_in = lasagne.layers.InputLayer((None, 3, None, 28)) l_loc = lasagne.layers.DenseLayer( lasagne.layers.ReshapeLayer(l_in, ([0], 3*28*28)), num_units=6, W=l_loc.W, b=l_loc.b) l_trans = lasagne.layers.TransformerLayer( l_in, l_loc, downsample_factor=downsample) # check that shape propagation works assert l_trans.output_shape[0] is None assert l_trans.output_shape[1] == 3 assert l_trans.output_shape[2] is None assert l_trans.output_shape[3] == int(28 / 2.3) # check that data propagation works output = lasagne.layers.get_output(l_trans, x_sym) x_out2 = output.eval({x_sym: x}) assert x_out2.shape == x_out.shape np.testing.assert_allclose(x_out2, x_out, rtol=1e-5, atol=1e-5) def test_transform_affine_identity(self): from lasagne.layers import InputLayer, TransformerLayer from lasagne.utils import floatX from theano.tensor import constant batchsize = 10 l_in = InputLayer((batchsize, 3, 28, 28)) l_loc = InputLayer((batchsize, 6)) layer = TransformerLayer(l_in, l_loc) inputs = floatX(np.arange(np.prod(l_in.shape)).reshape(l_in.shape)) thetas = floatX(np.tile([1, 0, 0, 0, 1, 0], (batchsize, 1))) outputs = layer.get_output_for([constant(inputs), constant(thetas)]).eval() np.testing.assert_allclose(inputs, outputs, rtol=1e-6) def test_transform_border_modes(self): from lasagne.layers import InputLayer, TransformerLayer from lasagne.utils import floatX from theano.tensor import constant l_in = InputLayer((1, 1, 16, 16)) l_loc = InputLayer((1, 6)) # border_mode='nearest' layer = TransformerLayer(l_in, l_loc, border_mode='nearest') image = np.hstack((np.zeros((16, 8)), np.ones((16, 8)))) inputs = floatX(image).reshape(l_in.shape) thetas = floatX(np.array([[4, 0, 0, 0, 1, 0]])) outputs = layer.get_output_for([constant(inputs), constant(thetas)]).eval() np.testing.assert_allclose(inputs, outputs, rtol=1e-6) # border_mode='mirror' layer = TransformerLayer(l_in, l_loc, border_mode='mirror') outputs = layer.get_output_for([constant(inputs), constant(thetas)]).eval() expected = np.zeros_like(outputs) expected[0, 0] = [.5, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, .5] np.testing.assert_allclose(expected, outputs, rtol=1e-6) # border_mode='wrap' layer = TransformerLayer(l_in, l_loc, border_mode='wrap') outputs = layer.get_output_for([constant(inputs), constant(thetas)]).eval() expected = np.zeros_like(outputs) expected[0, 0] = [1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] np.testing.assert_allclose(expected, outputs, rtol=1e-6) with pytest.raises(ValueError): layer = TransformerLayer(l_in, l_loc, border_mode='invalid') outputs = layer.get_output_for([constant(inputs), constant(thetas)]).eval() class TestTPSTransformLayer(): def test_transform_thin_plate_spline_errors(self): import lasagne # Check that number of inputs matches 2*num_control_points with pytest.raises(ValueError): num_control_points = 16 l_in_a = lasagne.layers.InputLayer((None, 3, 28, 28)) l_loc_a = lasagne.layers.DenseLayer(l_in_a, num_units=3*num_control_points) l_trans = lasagne.layers.TPSTransformerLayer( l_in_a, l_loc_a, control_points=num_control_points) # Check that error is raised when precompute_grid is set to True # with unknown input size with pytest.raises(ValueError): l_in = lasagne.layers.InputLayer((None, 3, None, 28)) l_loc = lasagne.layers.DenseLayer( lasagne.layers.ReshapeLayer(l_in, ([0], 3*28*28)), num_units=32) l_trans = lasagne.layers.TPSTransformerLayer(l_in, l_loc, precompute_grid=True) # Check that input is right size with pytest.raises(ValueError): l_in_b = lasagne.layers.InputLayer((3, 28, 28)) l_loc_b = lasagne.layers.DenseLayer(l_in_b, num_units=6) l_trans = lasagne.layers.TPSTransformerLayer(l_in_b, l_loc_b) # Check that number of control points is a perfect square with pytest.raises(ValueError): num_control_points = 17 l_in_a = lasagne.layers.InputLayer((None, 3, 28, 28)) l_loc_a = lasagne.layers.DenseLayer(l_in_a, num_units=2*num_control_points) l_trans = lasagne.layers.TPSTransformerLayer( l_in_a, l_loc_a, control_points=num_control_points) # Check that the input shape is correct with pytest.raises(ValueError): num_control_points = 16 l_in_b = lasagne.layers.InputLayer((3, 28, 28)) l_loc_b = lasagne.layers.DenseLayer( l_in_b, num_units=2*num_control_points ) l_trans = lasagne.layers.TPSTransformerLayer(l_in_b, l_loc_b) def test_transform_thin_plate_spline_variable_input(self): import lasagne from lasagne.utils import floatX from theano.tensor import constant x = np.random.random((10, 3, 28, 28)).astype('float32') x_sym = theano.tensor.tensor4() l_in = lasagne.layers.InputLayer((None, 3, None, 28)) l_loc = lasagne.layers.DenseLayer( lasagne.layers.ReshapeLayer(l_in, ([0], 3*28*28)), num_units=32) l_trans = lasagne.layers.TPSTransformerLayer( l_in, l_loc, precompute_grid='auto') # check that shape propagation works assert l_trans.output_shape[0] is None assert l_trans.output_shape[1] == 3 assert l_trans.output_shape[2] is None assert l_trans.output_shape[3] == 28 # check that data propagation works dest_offset = np.zeros(shape=(10, 32)) inputs = floatX(np.arange(np.prod(x.shape)).reshape(x.shape)) outputs = l_trans.get_output_for([constant(inputs), constant(dest_offset)]).eval() np.testing.assert_allclose(inputs, outputs, atol=5e-4) def test_transform_thin_plate_spline_downsample(self): import lasagne downsample = (0.7, 2.3) x = np.random.random((10, 3, 28, 28)).astype('float32') x_sym = theano.tensor.tensor4() # create transformer with fixed input size l_in = lasagne.layers.InputLayer((None, 3, 28, 28)) l_loc = lasagne.layers.DenseLayer(l_in, num_units=32) l_trans = lasagne.layers.TPSTransformerLayer( l_in, l_loc, downsample_factor=downsample, precompute_grid=False ) # check that shape propagation works assert l_trans.output_shape[0] is None assert l_trans.output_shape[1:] == (3, int(28 / .7), int(28 / 2.3)) # check that data propagation works output = lasagne.layers.get_output(l_trans, x_sym) x_out = output.eval({x_sym: x}) assert x_out.shape[0] == x.shape[0] assert x_out.shape[1:] == l_trans.output_shape[1:] # create transformer with variable input size l_in = lasagne.layers.InputLayer((None, 3, None, 28)) l_loc = lasagne.layers.DenseLayer( lasagne.layers.ReshapeLayer(l_in, ([0], 3*28*28)), num_units=32, W=l_loc.W, b=l_loc.b) l_trans = lasagne.layers.TPSTransformerLayer( l_in, l_loc, downsample_factor=downsample, precompute_grid=False ) # check that shape propagation works assert l_trans.output_shape[0] is None assert l_trans.output_shape[1] == 3 assert l_trans.output_shape[2] is None assert l_trans.output_shape[3] == int(28 / 2.3) # check that data propagation works output = lasagne.layers.get_output(l_trans, x_sym) x_out2 = output.eval({x_sym: x}) assert x_out2.shape == x_out.shape np.testing.assert_allclose(x_out2, x_out, rtol=1e-5, atol=1e-5) def test_transform_thin_plate_spline_identity(self): from lasagne.layers import InputLayer, TPSTransformerLayer from lasagne.utils import floatX from theano.tensor import constant batchsize = 5 num_control_points = 16 dest_offset = np.zeros(shape=(batchsize, 2*num_control_points)) l_in = InputLayer((batchsize, 3, 28, 28)) l_loc = InputLayer((batchsize, 2*num_control_points)) layer = TPSTransformerLayer( l_in, l_loc, control_points=num_control_points ) inputs = floatX(np.arange(np.prod(l_in.shape)).reshape(l_in.shape)) outputs = layer.get_output_for([constant(inputs), constant(dest_offset)]).eval() np.testing.assert_allclose(inputs, outputs, atol=5e-4) def test_transform_thin_plate_spline_shift(self): from lasagne.layers import InputLayer, TPSTransformerLayer from theano.tensor import constant batchsize = 5 num_control_points = 16 dest_offset = np.ones(shape=(batchsize, 2*num_control_points)) l_in = InputLayer((batchsize, 3, 28, 28)) l_loc = InputLayer((batchsize, 2*num_control_points)) layer = TPSTransformerLayer( l_in, l_loc, control_points=num_control_points ) image = np.zeros(shape=(28, 28)) image[[0, -1], :] = 1 image[:, [0, -1]] = 1 inputs = np.tile(image, (batchsize, 3, 1, 1)) shifted_input = np.ones(shape=(28, 28)) shifted_input[:13, :13] = 0 shifted_input[13, :13] = 0.50000271 shifted_input[:13, 13] = 0.50000271 shifted_input[13, 13] = 0.75000271 shifted_input = np.tile(shifted_input, (batchsize, 3, 1, 1)) outputs = layer.get_output_for([constant(inputs), constant(dest_offset)]).eval() np.testing.assert_allclose(shifted_input, outputs, atol=1e-5) class TestParametricRectifierLayer: @pytest.fixture def ParametricRectifierLayer(self): from lasagne.layers.special import ParametricRectifierLayer return ParametricRectifierLayer @pytest.fixture def init_alpha(self): # initializer for a tensor of unique values return lambda shape: floatX((np.arange( np.prod(shape)).reshape(shape)) / floatX(np.prod(shape))) def test_alpha_init(self, ParametricRectifierLayer, init_alpha): input_shape = (None, 3, 28, 28) # default: alphas only over 2nd axis layer = ParametricRectifierLayer(input_shape, alpha=init_alpha) alpha = layer.alpha assert layer.shared_axes == (0, 2, 3) assert alpha.get_value().shape == (3, ) assert np.allclose(alpha.get_value(), init_alpha((3, ))) # scalar alpha layer = ParametricRectifierLayer(input_shape, alpha=init_alpha, shared_axes='all') alpha = layer.alpha assert layer.shared_axes == (0, 1, 2, 3) assert alpha.get_value().shape == () assert np.allclose(alpha.get_value(), init_alpha((1,))) # alphas shared over the 1st axis layer = ParametricRectifierLayer(input_shape, alpha=init_alpha, shared_axes=0) alpha = layer.alpha assert layer.shared_axes == (0,) assert alpha.get_value().shape == (3, 28, 28) assert np.allclose(alpha.get_value(), init_alpha((3, 28, 28))) # alphas shared over the 1st and 4th axes layer = ParametricRectifierLayer(input_shape, alpha=init_alpha, shared_axes=(0, 3)) alpha = layer.alpha assert layer.shared_axes == (0, 3) assert alpha.get_value().shape == (3, 28) assert np.allclose(alpha.get_value(), init_alpha((3, 28))) def test_undefined_shape(self, ParametricRectifierLayer): with pytest.raises(ValueError): ParametricRectifierLayer((None, 3, 28, 28), shared_axes=(1, 2, 3)) def test_get_output_for(self, ParametricRectifierLayer, init_alpha): input_shape = (3, 3, 28, 28) # random input tensor input = np.random.randn(*input_shape).astype(theano.config.floatX) # default: alphas shared only along 2nd axis layer = ParametricRectifierLayer(input_shape, alpha=init_alpha) alpha_v = layer.alpha.get_value() expected = np.maximum(input, 0) + np.minimum(input, 0) * \ alpha_v[None, :, None, None] assert np.allclose(layer.get_output_for(input).eval(), expected) # scalar alpha layer = ParametricRectifierLayer(input_shape, alpha=init_alpha, shared_axes='all') alpha_v = layer.alpha.get_value() expected = np.maximum(input, 0) + np.minimum(input, 0) * alpha_v assert np.allclose(layer.get_output_for(input).eval(), expected) # alphas shared over the 1st axis layer = ParametricRectifierLayer(input_shape, alpha=init_alpha, shared_axes=0) alpha_v = layer.alpha.get_value() expected = np.maximum(input, 0) + np.minimum(input, 0) * \ alpha_v[None, :, :, :] assert np.allclose(layer.get_output_for(input).eval(), expected, atol=1e-07) # alphas shared over the 1st and 4th axes layer = ParametricRectifierLayer(input_shape, shared_axes=(0, 3), alpha=init_alpha) alpha_v = layer.alpha.get_value() expected = np.maximum(input, 0) + np.minimum(input, 0) * \ alpha_v[None, :, :, None] assert np.allclose(layer.get_output_for(input).eval(), expected) def test_prelu(self, init_alpha): import lasagne input_shape = (3, 28) input = np.random.randn(*input_shape).astype(theano.config.floatX) l_in = lasagne.layers.input.InputLayer(input_shape) l_dense = lasagne.layers.dense.DenseLayer(l_in, num_units=100) l_prelu = lasagne.layers.prelu(l_dense, alpha=init_alpha) output = lasagne.layers.get_output(l_prelu, input) assert l_dense.nonlinearity == lasagne.nonlinearities.identity W = l_dense.W.get_value() b = l_dense.b.get_value() alpha_v = l_prelu.alpha.get_value() expected = np.dot(input, W) + b expected = np.maximum(expected, 0) + \ np.minimum(expected, 0) * alpha_v assert np.allclose(output.eval(), expected, atol=1e-07) class TestRandomizedRectifierLayer: @pytest.fixture def RandomizedRectifierLayer(self): from lasagne.layers.special import RandomizedRectifierLayer return RandomizedRectifierLayer def test_high_low(self, RandomizedRectifierLayer): with pytest.raises(ValueError): RandomizedRectifierLayer((None, 3, 28, 28), lower=0.9, upper=0.1) def test_nomod_positive(self, RandomizedRectifierLayer): input = np.ones((3, 3, 28, 28)).astype(theano.config.floatX) layer = RandomizedRectifierLayer(input.shape) out = layer.get_output_for(input).eval() assert np.allclose(out, 1.0) def test_low_eq_high(self, RandomizedRectifierLayer): input = np.ones((3, 3, 28, 28)).astype(theano.config.floatX) * -1 layer = RandomizedRectifierLayer(input.shape, lower=0.5, upper=0.5) out = layer.get_output_for(theano.tensor.constant(input)).eval() assert np.allclose(out, -0.5) def test_deterministic(self, RandomizedRectifierLayer): input = np.ones((3, 3, 28, 28)).astype(theano.config.floatX) * -1 layer = RandomizedRectifierLayer(input.shape, lower=0.4, upper=0.6) out = layer.get_output_for(theano.tensor.constant(input), deterministic=True).eval() assert np.allclose(out, -0.5) def test_dim_None(self, RandomizedRectifierLayer): import lasagne l_in = lasagne.layers.input.InputLayer((None, 3, 28, 28)) layer = RandomizedRectifierLayer(l_in) input = np.ones((3, 3, 28, 28)).astype(theano.config.floatX) out = layer.get_output_for(input).eval() assert np.allclose(out, 1.0) def assert_between(self, layer, input, output): slopes = output / input slopes = slopes[input < 0] assert slopes.min() >= layer.lower assert slopes.max() <= layer.upper assert slopes.var() > 0 def test_get_output_for(self, RandomizedRectifierLayer): input_shape = (3, 3, 28, 28) # ensure slope never exceeds [lower,upper) input = np.random.randn(*input_shape).astype(theano.config.floatX) layer = RandomizedRectifierLayer(input_shape, shared_axes=0) self.assert_between(layer, input, layer.get_output_for(input).eval()) # from here on, we want to check parameter sharing # this is easier to check if the input is all ones input = np.ones(input_shape).astype(theano.config.floatX) * -1 # default: parameters shared along all but 2nd axis layer = RandomizedRectifierLayer(input_shape) out = layer.get_output_for(input).eval() assert [ np.allclose(out.var(axis=a), 0) for a in range(4) ] == [True, False, True, True] # share across all axes (single slope) layer = RandomizedRectifierLayer(input_shape, shared_axes='all') out = layer.get_output_for(input).eval() assert [ np.allclose(out.var(axis=a), 0) for a in range(4) ] == [True, True, True, True] # share across 1st axis layer = RandomizedRectifierLayer(input_shape, shared_axes=0) out = layer.get_output_for(input).eval() assert [ np.allclose(out.var(axis=a), 0) for a in range(4) ] == [True, False, False, False] # share across 1st and 4th axes layer = RandomizedRectifierLayer(input_shape, shared_axes=(0, 3)) out = layer.get_output_for(input).eval() assert [ np.allclose(out.var(axis=a), 0) for a in range(4) ] == [True, False, False, True] def test_rrelu(self): import lasagne input_shape = (3, 28) input = np.random.randn(*input_shape).astype(theano.config.floatX) l_in = lasagne.layers.input.InputLayer(input_shape) l_dense = lasagne.layers.dense.DenseLayer(l_in, num_units=100) l_rrelu = lasagne.layers.rrelu(l_dense) output = lasagne.layers.get_output(l_rrelu, input) assert l_dense.nonlinearity == lasagne.nonlinearities.identity W = l_dense.W.get_value() b = l_dense.b.get_value() self.assert_between(l_rrelu, np.dot(input, W) + b, output.eval()) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_conv.py0000644000175000017500000011742713307306052027524 0ustar sinclairssinclairsimport numpy as np import pytest import importlib import theano from theano import tensor as T import lasagne from lasagne.utils import floatX, as_tuple try: from theano import gpuarray theano_backend = "pygpu" except ImportError: from theano.sandbox import gpuarray theano_backend = "pygpu_sandbox" gpu = gpuarray.pygpu_activated if not gpu: try: from theano.sandbox import cuda theano_backend = "cuda_sandbox" gpu = cuda.cuda_enabled except Exception: # Theano 0.10+ raises nose.SkipTest gpu = False if not gpu: theano_backend = "cpu" def convNd(input, kernel, pad, stride=1, groups=1, n=None): """Execute a batch of a stack of N-dimensional convolutions. Parameters ---------- input : numpy array kernel : numpy array pad : {0, 'valid', 'same', 'full'}, int or tuple of int stride : int or tuple of int groups: int n : int Returns ------- numpy array """ if groups > 1: input = input.reshape(input.shape[0], groups, -1, *input.shape[2:]) kernel = kernel.reshape(groups, -1, *kernel.shape[1:]) return np.concatenate([convNd(input[:, g], kernel[g], pad, stride, groups=1, n=n) for g in range(groups)], axis=1) if n is None: n = input.ndim - 2 if pad not in ['valid', 'same', 'full']: pad = as_tuple(pad, n, int) input = np.pad(input, [(p, p) for p in (0, 0) + pad], mode='constant') pad = 'valid' output = np.zeros((input.shape[0], kernel.shape[0]) + tuple(i + k - 1 for i, k in zip(input.shape[2:], kernel.shape[2:]))) if n == 1: for i in range(kernel.shape[2]): f = kernel[:, :, i:i+1] c = (input[:, np.newaxis] * f).sum(axis=2) output[:, :, i:i + input.shape[2]] += c elif n == 2: for i in range(kernel.shape[2]): for j in range(kernel.shape[3]): f = kernel[:, :, i:i+1, j:j+1] c = (input[:, np.newaxis] * f).sum(axis=2) output[:, :, i:i + input.shape[2], j:j + input.shape[3]] += c elif n == 3: for i in range(kernel.shape[2]): for j in range(kernel.shape[3]): for k in range(kernel.shape[4]): f = kernel[:, :, i:i+1, j:j+1, k:k+1] c = (input[:, np.newaxis] * f).sum(axis=2) output[:, :, i:i + input.shape[2], j:j + input.shape[3], k:k + input.shape[4]] += c else: raise NotImplementedError("convNd() only supports n in (1, 2, 3)") if pad == 'valid': trim = tuple(k - 1 for k in kernel.shape[2:]) slices = [slice(None), slice(None)] slices += [slice(t, -t or None) for t in trim] output = output[slices] elif pad == 'same': shift = tuple((k - 1) // 2 for k in kernel.shape[2:]) slices = [slice(None), slice(None)] slices += [slice(s, s + i) for s, i in zip(shift, input.shape[2:])] output = output[slices] stride = as_tuple(stride, n, int) if any(s > 1 for s in stride): slices = [slice(None), slice(None)] slices += [slice(None, None, s) for s in stride] output = output[slices] return output def dilate(input, factors): """Inserts `factors[i] - 1` zeros between input elements on axis i.""" output = np.zeros(tuple((s-1)*f + 1 for s, f in zip(input.shape, factors)), dtype=input.dtype) output[[slice(None, None, factor) for factor in factors]] = input return output def transposed_convNd(input, kernel, crop, stride=1, n=None, extend=None): if n is None: n = input.ndim - 2 if crop == 'valid': pad = 'full' elif crop == 'full': pad = 'valid' elif crop == 'same': pad = 'same' else: crop = as_tuple(crop, n, int) pad = tuple(f - 1 - c for f, c in zip(kernel.shape[2:], crop)) stride = as_tuple(stride, n, int) dilated_input = dilate(input, (1, 1) + stride) if extend is not None: extend = as_tuple(extend, n, int) extend = [(0, p) for p in (0, 0) + extend] dilated_input = np.pad(dilated_input, extend, mode='constant') return convNd(dilated_input, kernel, pad, stride=1, n=n) def dilated_convNd(input, kernel, pad, dilation=1, n=None): if n is None: n = input.ndim - 2 dilation = as_tuple(dilation, n, int) dilated_kernel = dilate(kernel, (1, 1) + dilation) return convNd(input, dilated_kernel, pad, stride=1, n=n) def convNd_test_sets(n): def _convert(input, kernel, output, kwargs): return [theano.shared(floatX(input)), floatX(kernel), output, kwargs] extra_shape = (11, 16, 23) input_shape = (3, 1) + extra_shape[-n:] for pad in (0, 1, 2, 'full', 'same'): for stride in (1, 2, 3): for filter_size in (1, 3): if stride > filter_size: continue input = np.random.random(input_shape) kernel = np.random.random((16, 1) + (filter_size,) * n) output = convNd(input, kernel, pad, stride, n=n) yield _convert(input, kernel, output, {'pad': pad, 'stride': stride, 'flip_filters': True, }) # bias-less case input = np.random.random(input_shape) kernel = np.random.random((16, 1) + (3,) * n) output = convNd(input, kernel, pad='valid') yield _convert(input, kernel, output, {'b': None, 'flip_filters': True}) # untie_biases=True case yield _convert(input, kernel, output, {'untie_biases': True, 'flip_filters': True}) # pad='valid' case yield _convert(input, kernel, output, {'pad': 'valid', 'flip_filters': True}) # flip_filters=False case flip = (slice(None), slice(None)) + (slice(None, None, -1),) * n output = convNd(input, kernel[flip], pad='valid') yield _convert(input, kernel, output, {'flip_filters': False}) # num_groups=3 case input_shape = (2, 6) + extra_shape[-n:] input = np.random.random(input_shape) kernel = np.random.random((9, 2) + (3,) * n) output = convNd(input, kernel, pad='valid', groups=3) yield _convert(input, kernel, output, {'num_groups': 3, 'flip_filters': True}) def conv3d_test_sets(): return convNd_test_sets(3) def conv2d_test_sets(): return convNd_test_sets(2) def conv1d_test_sets(): return convNd_test_sets(1) def transp_conv2d_test_sets(): def _convert(input, kernel, output, kwargs): return [floatX(input), floatX(kernel), output, kwargs] input_shape = (3, 1, 11, 16) for crop in (0, 1, 2, 'full', 'same'): for stride in (1, 2, 3): for filter_size in (1, 3): if stride > filter_size: continue if crop not in ('full', 'same') and crop > (filter_size - 1): continue input = np.random.random(input_shape) kernel = np.random.random((16, 1, filter_size, filter_size)) output = transposed_convNd(input, kernel, crop, stride, 2) yield _convert(input, kernel, output, {'crop': crop, 'stride': stride, 'flip_filters': True}) # bias-less case input = np.random.random(input_shape) kernel = np.random.random((16, 1, 3, 3)) output = transposed_convNd(input, kernel, 'valid') yield _convert(input, kernel, output, {'b': None, 'flip_filters': True}) # untie_biases=True case yield _convert(input, kernel, output, {'untie_biases': True, 'flip_filters': True}) # crop='valid' case yield _convert(input, kernel, output, {'crop': 'valid', 'flip_filters': True}) # flip_filters=False case output = transposed_convNd(input, kernel[:, :, ::-1, ::-1], 'valid') yield _convert(input, kernel, output, {'flip_filters': False}) # extend (w/ and w/out symbolic output shape) for symbolic in [False, True]: input_shape = (4, 3, 7, 9) input = np.random.random(input_shape) kernel = np.random.random((16, 3, 2, 3)) stride = (2, 3) for extend in [(0, 1), (1, 2)]: output = transposed_convNd(input, kernel, 0, stride, extend=extend) kwargs = {'stride': stride, 'flip_filters': True} if symbolic: kwargs['output_size'] = theano.shared( np.array(output.shape[2:])) else: kwargs['output_size'] = output.shape[2:] yield _convert(input, kernel, output, kwargs) def transp_conv3d_test_sets(): def _convert(input, kernel, output, kwargs): return [floatX(input), floatX(kernel), output, kwargs] input_shape = (3, 1, 9, 11, 16) for crop in (0, 1, 2, 'full', 'same'): for stride in (1, 2, 3): for filter_size in (1, 3): if stride > filter_size: continue if crop not in ('full', 'same') and crop > (filter_size - 1): continue input = np.random.random(input_shape) kernel = np.random.random((16, 1, filter_size, filter_size, filter_size)) output = transposed_convNd(input, kernel, crop, stride, 3) yield _convert(input, kernel, output, {'crop': crop, 'stride': stride, 'flip_filters': True}) # bias-less case input = np.random.random(input_shape) kernel = np.random.random((16, 1, 3, 3, 3)) output = transposed_convNd(input, kernel, 'valid') yield _convert(input, kernel, output, {'b': None, 'flip_filters': True}) # untie_biases=True case yield _convert(input, kernel, output, {'untie_biases': True, 'flip_filters': True}) # crop='valid' case yield _convert(input, kernel, output, {'crop': 'valid', 'flip_filters': True}) # flip_filters=False case output = transposed_convNd(input, kernel[:, :, ::-1, ::-1, ::-1], 'valid') yield _convert(input, kernel, output, {'flip_filters': False}) # extend (w/ and w/out symbolic output shape) for symbolic in [False, True]: input_shape = (4, 3, 7, 9, 11) input = np.random.random(input_shape) kernel = np.random.random((16, 3, 2, 3, 5)) stride = (2, 3, 5) for extend in [(0, 1, 3), (1, 2, 4)]: output = transposed_convNd(input, kernel, 0, stride, extend=extend) kwargs = {'stride': stride, 'flip_filters': True} if symbolic: kwargs['output_size'] = theano.shared( np.array(output.shape[2:])) else: kwargs['output_size'] = output.shape[2:] yield _convert(input, kernel, output, kwargs) def dilated_conv2d_test_sets(): def _convert(input, kernel, output, kwargs): return [floatX(input), floatX(kernel), output, kwargs] input_shape = (3, 1, 11, 16) for dilation in (1, 2, 3): for filter_size in (1, 3): input = np.random.random(input_shape) kernel = np.random.random((16, 1, filter_size, filter_size)) kernel_flip = kernel[:, :, ::-1, ::-1] output = dilated_convNd(input, kernel_flip, 'valid', dilation, 2) yield _convert(input, kernel, output, {'dilation': dilation}) # bias-less case input = np.random.random(input_shape) kernel = np.random.random((16, 1, 3, 3)) output = dilated_convNd(input, kernel[:, :, ::-1, ::-1], pad='valid') yield _convert(input, kernel, output, {'b': None}) # untie_biases=True case yield _convert(input, kernel, output, {'untie_biases': True}) def test_conv_output_length(): from lasagne.layers.conv import conv_output_length assert conv_output_length(13, 5, 3, 'valid') == 3 assert conv_output_length(13, 5, 3, 0) == 3 assert conv_output_length(13, 5, 3, 'full') == 6 assert conv_output_length(13, 5, 3, 'same') == 5 assert conv_output_length(13, 5, 3, 2) == 5 with pytest.raises(ValueError) as exc: conv_output_length(13, 5, 3, '_nonexistent_mode') assert "Invalid pad: " in exc.value.args[0] def test_conv_input_length(): from lasagne.layers.conv import conv_input_length # using the examples from https://github.com/vdumoulin/conv_arithmetic # no padding, no strides assert conv_input_length(2, 3, 1, 'valid') == 4 assert conv_input_length(2, 3, 1, 0) == 4 # padding, no strides assert conv_input_length(6, 4, 1, 2) == 5 # no padding, strides assert conv_input_length(2, 3, 2, 0) == 5 # padding, strides assert conv_input_length(3, 3, 2, 'same') == 5 # full convolution assert conv_input_length(3, 3, 2, 'full') == 3 with pytest.raises(ValueError) as exc: conv_input_length(3, 5, 3, '_nonexistent_mode') assert "Invalid pad: " in exc.value.args[0] @pytest.fixture def DummyInputLayer(): def factory(shape): from lasagne.layers.input import InputLayer return InputLayer(shape) return factory class TestBaseConvLayer: def test_infer_dimensionality(self): from lasagne.layers.conv import BaseConvLayer shape = (10, 20, 30, 40, 50, 60) for n in range(1, 4): layer = BaseConvLayer(shape[:n+2], 1, 3) assert layer.n == n def test_convolve_not_implemented(self): from lasagne.layers.conv import BaseConvLayer layer = BaseConvLayer((10, 20, 30), 1, 3) with pytest.raises(NotImplementedError): layer.convolve(theano.tensor.tensor3()) def test_fail_on_mismatching_dimensionality(self): from lasagne.layers.conv import BaseConvLayer with pytest.raises(ValueError) as exc: BaseConvLayer((10, 20, 30), 1, 3, n=2) assert "Expected 4 input dimensions" in exc.value.args[0] with pytest.raises(ValueError) as exc: BaseConvLayer((10, 20, 30, 40), 1, 3, n=1) assert "Expected 3 input dimensions" in exc.value.args[0] def test_fail_on_mismatching_groups(self): from lasagne.layers.conv import BaseConvLayer with pytest.raises(ValueError) as exc: BaseConvLayer((2, 3, 4), 1, 3, num_groups=2) assert "evenly divide" in exc.value.args[0] with pytest.raises(ValueError) as exc: BaseConvLayer((2, 3, 4), 1, 3, num_groups=-3) assert "must be positive" in exc.value.args[0] def test_integer_types(self): from lasagne.layers.conv import BaseConvLayer BaseConvLayer((2, 3, 4), np.int64(1), np.int64(3)) BaseConvLayer((2, 3, 4, 5), 1, np.empty((3, 3)).shape) class TestConv1DLayer: @pytest.mark.parametrize( "input, kernel, output, kwargs", list(conv1d_test_sets())) def test_defaults(self, DummyInputLayer, input, kernel, output, kwargs): b, c, w = input.shape.eval() input_layer = DummyInputLayer((b, c, w)) try: from lasagne.layers.conv import Conv1DLayer layer = Conv1DLayer( input_layer, num_filters=kernel.shape[0], filter_size=kernel.shape[2], W=kernel, **kwargs ) actual = layer.get_output_for(input).eval() assert actual.shape == output.shape assert actual.shape == layer.output_shape assert np.allclose(actual, output) except (NotImplementedError, RuntimeError): pass def test_init_none_nonlinearity_bias(self, DummyInputLayer): from lasagne.layers.conv import Conv1DLayer input_layer = DummyInputLayer((1, 2, 3)) layer = Conv1DLayer(input_layer, num_filters=16, filter_size=(3,), nonlinearity=None, b=None) assert layer.nonlinearity == lasagne.nonlinearities.identity assert layer.b is None def test_invalid_pad(self, DummyInputLayer): from lasagne.layers.conv import Conv1DLayer input_layer = DummyInputLayer((1, 2, 3)) with pytest.raises(TypeError) as exc: layer = Conv1DLayer(input_layer, num_filters=16, filter_size=(3,), pad='_nonexistent_mode') assert "iterable of int" in exc.value.args[0] with pytest.raises(NotImplementedError) as exc: layer = Conv1DLayer(input_layer, num_filters=16, filter_size=(4,), pad='same') assert "requires odd filter size" in exc.value.args[0] class TestConv2DLayerImplementations: @pytest.fixture( params=[ ('lasagne.layers', 'Conv2DLayer'), ('lasagne.layers.cuda_convnet', 'Conv2DCCLayer'), ('lasagne.layers.corrmm', 'Conv2DMMLayer'), ('lasagne.layers.dnn', 'Conv2DDNNLayer'), ], ) def Conv2DImpl(self, request): impl_module_name, impl_name = request.param try: mod = importlib.import_module(impl_module_name) except ImportError: pytest.skip("{} not available".format(impl_module_name)) return getattr(mod, impl_name) @pytest.mark.parametrize( "input, kernel, output, kwargs", list(conv2d_test_sets())) def test_defaults(self, Conv2DImpl, DummyInputLayer, input, kernel, output, kwargs): b, c, h, w = input.shape.eval() input_layer = DummyInputLayer((b, c, h, w)) try: layer = Conv2DImpl( input_layer, num_filters=kernel.shape[0], filter_size=kernel.shape[2:], W=kernel, **kwargs ) actual = layer.get_output_for(input).eval() assert actual.shape == output.shape assert actual.shape == layer.output_shape assert np.allclose(actual, output) except (NotImplementedError, RuntimeError): pytest.skip() @pytest.mark.parametrize( "input, kernel, output, kwargs", list(conv2d_test_sets())) def test_with_nones(self, Conv2DImpl, DummyInputLayer, input, kernel, output, kwargs): if kwargs.get('untie_biases', False): pytest.skip() b, c, h, w = input.shape.eval() input_layer = DummyInputLayer((None, c, None, None)) try: layer = Conv2DImpl( input_layer, num_filters=kernel.shape[0], filter_size=kernel.shape[2:], W=kernel, **kwargs ) actual = layer.get_output_for(input).eval() assert layer.output_shape == (None, kernel.shape[0], None, None) assert actual.shape == output.shape assert np.allclose(actual, output) except (NotImplementedError, RuntimeError): pytest.skip() def test_init_none_nonlinearity_bias(self, Conv2DImpl, DummyInputLayer): input_layer = DummyInputLayer((1, 2, 3, 3)) layer = Conv2DImpl(input_layer, num_filters=16, filter_size=(3, 3), nonlinearity=None, b=None) assert layer.nonlinearity == lasagne.nonlinearities.identity assert layer.b is None def test_invalid_pad(self, Conv2DImpl, DummyInputLayer): input_layer = DummyInputLayer((1, 2, 3, 3)) with pytest.raises(TypeError) as exc: layer = Conv2DImpl(input_layer, num_filters=16, filter_size=(3, 3), pad='_nonexistent_mode') assert "iterable of int" in exc.value.args[0] with pytest.raises(NotImplementedError) as exc: layer = Conv2DImpl(input_layer, num_filters=16, filter_size=(4, 4), pad='same') assert "requires odd filter size" in exc.value.args[0] def test_get_params(self, Conv2DImpl, DummyInputLayer): input_layer = DummyInputLayer((128, 3, 32, 32)) layer = Conv2DImpl(input_layer, num_filters=16, filter_size=(3, 3)) assert layer.get_params() == [layer.W, layer.b] assert layer.get_params(regularizable=False) == [layer.b] assert layer.get_params(regularizable=True) == [layer.W] assert layer.get_params(trainable=True) == [layer.W, layer.b] assert layer.get_params(trainable=False) == [] assert layer.get_params(_nonexistent_tag=True) == [] assert layer.get_params(_nonexistent_tag=False) == [layer.W, layer.b] class TestConv3DLayerImplementations: @pytest.fixture( params=[ ('lasagne.layers', 'Conv3DLayer'), ('lasagne.layers.dnn', 'Conv3DDNNLayer'), ], ) def Conv3DImpl(self, request): impl_module_name, impl_name = request.param try: mod = importlib.import_module(impl_module_name) return getattr(mod, impl_name) except (ImportError, AttributeError): pytest.skip("{} not available".format(impl_module_name)) @pytest.mark.parametrize( "input, kernel, output, kwargs", list(conv3d_test_sets())) def test_defaults(self, Conv3DImpl, DummyInputLayer, input, kernel, output, kwargs): b, c, d, h, w = input.shape.eval() input_layer = DummyInputLayer((b, c, d, h, w)) try: layer = Conv3DImpl( input_layer, num_filters=kernel.shape[0], filter_size=kernel.shape[2:], W=kernel, **kwargs ) actual = layer.get_output_for(input).eval() assert actual.shape == output.shape assert actual.shape == layer.output_shape assert np.allclose(actual, output) except (NotImplementedError, RuntimeError): pytest.skip() @pytest.mark.parametrize( "input, kernel, output, kwargs", list(conv3d_test_sets())) def test_with_nones(self, Conv3DImpl, DummyInputLayer, input, kernel, output, kwargs): if kwargs.get('untie_biases', False): pytest.skip() b, c, d, h, w = input.shape.eval() input_layer = DummyInputLayer((None, c, None, None, None)) try: layer = Conv3DImpl( input_layer, num_filters=kernel.shape[0], filter_size=kernel.shape[2:], W=kernel, **kwargs ) actual = layer.get_output_for(input).eval() assert layer.output_shape == (None, kernel.shape[0], None, None, None) assert actual.shape == output.shape assert np.allclose(actual, output) except (NotImplementedError, RuntimeError): pytest.skip() def test_init_none_nonlinearity_bias(self, Conv3DImpl, DummyInputLayer): input_layer = DummyInputLayer((1, 2, 3, 3, 3)) layer = Conv3DImpl(input_layer, num_filters=16, filter_size=(3, 3, 3), nonlinearity=None, b=None) assert layer.nonlinearity == lasagne.nonlinearities.identity assert layer.b is None def test_invalid_pad(self, Conv3DImpl, DummyInputLayer): input_layer = DummyInputLayer((1, 2, 3, 3, 3)) with pytest.raises(TypeError) as exc: layer = Conv3DImpl(input_layer, num_filters=16, filter_size=(3, 3, 3), pad='_nonexistent_mode') assert "iterable of int" in exc.value.args[0] with pytest.raises(NotImplementedError) as exc: layer = Conv3DImpl(input_layer, num_filters=16, filter_size=(4, 4, 4), pad='same') assert "requires odd filter size" in exc.value.args[0] def test_get_params(self, Conv3DImpl, DummyInputLayer): input_layer = DummyInputLayer((128, 3, 32, 32, 32)) layer = Conv3DImpl(input_layer, num_filters=16, filter_size=(3, 3, 3)) assert layer.get_params() == [layer.W, layer.b] assert layer.get_params(regularizable=False) == [layer.b] assert layer.get_params(regularizable=True) == [layer.W] assert layer.get_params(trainable=True) == [layer.W, layer.b] assert layer.get_params(trainable=False) == [] assert layer.get_params(_nonexistent_tag=True) == [] assert layer.get_params(_nonexistent_tag=False) == [layer.W, layer.b] class TestTransposedConv2DLayer: @pytest.mark.parametrize( "input, kernel, output, kwargs", list(transp_conv2d_test_sets())) def test_defaults(self, DummyInputLayer, input, kernel, output, kwargs): from lasagne.layers import TransposedConv2DLayer b, c, h, w = input.shape input_layer = DummyInputLayer((b, c, h, w)) layer = TransposedConv2DLayer( input_layer, num_filters=kernel.shape[0], filter_size=kernel.shape[2:], W=kernel.transpose(1, 0, 2, 3), **kwargs) actual = layer.get_output_for(input).eval() assert actual.shape == output.shape # layer.output_shape == actual.shape or None assert all([s1 == s2 for (s1, s2) in zip(actual.shape, output.shape) if s2]) assert np.allclose(actual, output) # Check get_output_shape_for for symbolic output if 'output_size' in kwargs and isinstance(kwargs['output_size'], T.Variable): assert all(el is None for el in layer.get_output_shape_for(input.shape)[2:]) @pytest.mark.parametrize( "input, kernel, output, kwargs", list(transp_conv2d_test_sets())) def test_with_nones(self, DummyInputLayer, input, kernel, output, kwargs): if kwargs.get('untie_biases', False): pytest.skip() from lasagne.layers import TransposedConv2DLayer b, c, h, w = input.shape input_layer = DummyInputLayer((None, c, None, None)) layer = TransposedConv2DLayer( input_layer, num_filters=kernel.shape[0], filter_size=kernel.shape[2:], W=kernel.transpose(1, 0, 2, 3), **kwargs) if 'output_size' not in kwargs or isinstance(kwargs['output_size'], T.Variable): assert layer.output_shape == (None, output.shape[1], None, None) actual = layer.get_output_for(input).eval() assert actual.shape == output.shape assert np.allclose(actual, output) # Check get_output_shape_for for non symbolic output if 'output_size' in kwargs and not isinstance(kwargs['output_size'], T.Variable): assert layer.get_output_shape_for(input.shape) == output.shape # The layer should report the output size even when it # doesn't know most of the input size assert layer.output_shape == ( None, output.shape[1]) + kwargs['output_size'] class TestTransposedConv3DLayer: @pytest.fixture( params=[ ('lasagne.layers', 'TransposedConv3DLayer') ], ) def TransposedConv3DLayerImpl(self, request): impl_module_name, impl_name = request.param try: mod = importlib.import_module(impl_module_name) return getattr(mod, impl_name) except (ImportError, AttributeError): pytest.skip("{} not available".format(impl_module_name)) @pytest.mark.parametrize( "input, kernel, output, kwargs", list(transp_conv3d_test_sets())) def test_defaults(self, TransposedConv3DLayerImpl, DummyInputLayer, input, kernel, output, kwargs): b, c, d, h, w = input.shape input_layer = DummyInputLayer((b, c, d, h, w)) try: layer = TransposedConv3DLayerImpl( input_layer, num_filters=kernel.shape[0], filter_size=kernel.shape[2:], W=kernel.transpose(1, 0, 2, 3, 4), **kwargs) actual = layer.get_output_for(input).eval() assert actual.shape == output.shape # layer.output_shape == actual.shape or None assert all([s1 == s2 for (s1, s2) in zip(actual.shape, output.shape) if s2]) assert np.allclose(actual, output) # Check get_output_shape_for for symbolic output if 'output_size' in kwargs and isinstance(kwargs['output_size'], T.Variable): assert all(el is None for el in layer.get_output_shape_for(input.shape)[2:]) except NotImplementedError: pytest.skip() @pytest.mark.parametrize( "input, kernel, output, kwargs", list(transp_conv3d_test_sets())) def test_with_nones(self, TransposedConv3DLayerImpl, DummyInputLayer, input, kernel, output, kwargs): if kwargs.get('untie_biases', False): pytest.skip() b, c, d, h, w = input.shape input_layer = DummyInputLayer((None, c, None, None, None)) try: layer = TransposedConv3DLayerImpl( input_layer, num_filters=kernel.shape[0], filter_size=kernel.shape[2:], W=kernel.transpose(1, 0, 2, 3, 4), **kwargs) if 'output_size' not in kwargs or \ isinstance(kwargs['output_size'], T.Variable): assert layer.output_shape == (None, output.shape[1], None, None, None) actual = layer.get_output_for(input).eval() assert actual.shape == output.shape assert np.allclose(actual, output) # Check get_output_shape_for for non symbolic output if 'output_size' in kwargs and not \ isinstance(kwargs['output_size'], T.Variable): assert layer.get_output_shape_for(input.shape) == output.shape # The layer should report the output size even when it # doesn't know most of the input size assert layer.output_shape == ( None, output.shape[1]) + kwargs['output_size'] except NotImplementedError: pytest.skip() class TestDilatedConv2DLayer: @pytest.mark.parametrize( "input, kernel, output, kwargs", list(dilated_conv2d_test_sets())) def test_defaults(self, DummyInputLayer, input, kernel, output, kwargs): from lasagne.layers import DilatedConv2DLayer b, c, h, w = input.shape input_layer = DummyInputLayer((b, c, h, w)) layer = DilatedConv2DLayer( input_layer, num_filters=kernel.shape[0], filter_size=kernel.shape[2:], W=kernel.transpose(1, 0, 2, 3), **kwargs) actual = layer.get_output_for(theano.shared(input)).eval() assert actual.shape == output.shape assert actual.shape == layer.output_shape assert np.allclose(actual, output) @pytest.mark.parametrize( "input, kernel, output, kwargs", list(dilated_conv2d_test_sets())) def test_with_nones(self, DummyInputLayer, input, kernel, output, kwargs): if kwargs.get('untie_biases', False): pytest.skip() from lasagne.layers import DilatedConv2DLayer b, c, h, w = input.shape input_layer = DummyInputLayer((None, c, None, None)) layer = DilatedConv2DLayer( input_layer, num_filters=kernel.shape[0], filter_size=kernel.shape[2:], W=kernel.transpose(1, 0, 2, 3), **kwargs) assert layer.output_shape == (None, output.shape[1], None, None) actual = layer.get_output_for(input).eval() assert actual.shape == output.shape assert np.allclose(actual, output) def test_unsupported_settings(self, DummyInputLayer): from lasagne.layers import DilatedConv2DLayer input_layer = DummyInputLayer((10, 20, 30, 40)) for pad in 'same', 'full', 1: with pytest.raises(NotImplementedError) as exc: DilatedConv2DLayer(input_layer, 2, 3, pad=pad) assert "requires pad=0" in exc.value.args[0] with pytest.raises(NotImplementedError) as exc: DilatedConv2DLayer(input_layer, 2, 3, flip_filters=True) assert "requires flip_filters=False" in exc.value.args[0] class TestConv2DDNNLayer: def test_import_without_gpu_or_cudnn_raises(self): if theano_backend == 'pygpu': from theano.gpuarray import dnn if dnn.dnn_present(): pytest.skip() elif theano_backend == 'pygpu_sandbox': from theano.sandbox.gpuarray import dnn if dnn.dnn_present(): pytest.skip() elif theano_backend == 'cuda_sandbox': from theano.sandbox.cuda import dnn if dnn.dnn_available(): pytest.skip() else: with pytest.raises(ImportError): import lasagne.layers.dnn class TestConv2DMMLayer: def test_import_without_gpu_raises(self): if theano_backend in ['pygpu', 'pygpu_sandbox', 'cuda_sandbox']: pytest.skip() else: with pytest.raises(ImportError): import lasagne.layers.corrmm class TestConv2DCCLayer: def test_import_without_gpu_raises(self): if theano_backend in ['pygpu', 'pygpu_sandbox', 'cuda_sandbox']: pytest.skip() else: with pytest.raises(ImportError): import lasagne.layers.cuda_convnet def test_unsupported_settings(self, DummyInputLayer): try: from lasagne.layers.cuda_convnet import Conv2DCCLayer except ImportError: pytest.skip("cuda_convnet not available") input_layer = DummyInputLayer((128, 3, 32, 32)) with pytest.raises(RuntimeError) as exc: layer = Conv2DCCLayer(input_layer, num_filters=16, filter_size=(3, 5)) assert ("Conv2DCCLayer only supports square filters" in exc.value.args[0]) with pytest.raises(RuntimeError) as exc: layer = Conv2DCCLayer(input_layer, num_filters=16, filter_size=(3, 3), stride=(1, 2)) assert ("Conv2DCCLayer only supports square strides" in exc.value.args[0]) with pytest.raises(RuntimeError) as exc: layer = Conv2DCCLayer(input_layer, num_filters=15, filter_size=(3, 3)) assert ("Conv2DCCLayer requires num_filters to be a multiple of 16" in exc.value.args[0]) with pytest.raises(RuntimeError) as exc: layer = Conv2DCCLayer(input_layer, num_filters=16, filter_size=(3, 3), pad=(1, 2)) assert ("Conv2DCCLayer only supports square padding" in exc.value.args[0]) input_layer = DummyInputLayer((128, 7, 32, 32)) with pytest.raises(RuntimeError) as exc: layer = Conv2DCCLayer(input_layer, num_filters=16, filter_size=(3, 3)) assert ("Conv2DCCLayer requires the number of input channels to be " "1, 2, 3 or a multiple of 4" in exc.value.args[0]) def test_pad(self, DummyInputLayer): try: from lasagne.layers.cuda_convnet import Conv2DCCLayer except ImportError: pytest.skip("cuda_convnet not available") input_layer = DummyInputLayer((128, 3, 32, 32)) layer = Conv2DCCLayer(input_layer, num_filters=16, filter_size=(3, 3), pad=(3, 3)) assert layer.output_shape == (128, 16, 36, 36) def test_dimshuffle_false_shapes(self, DummyInputLayer): try: from lasagne.layers.cuda_convnet import Conv2DCCLayer except ImportError: pytest.skip("cuda_convnet not available") input_layer = DummyInputLayer((4, 32, 32, 128)) # c01b instead of bc01 layer = Conv2DCCLayer(input_layer, num_filters=16, filter_size=(3, 3), dimshuffle=False) assert layer.W.get_value().shape == (4, 3, 3, 16) assert layer.b.get_value().shape == (16,) layer = Conv2DCCLayer(input_layer, num_filters=16, filter_size=(3, 3), dimshuffle=False, untie_biases=True) assert layer.W.get_value().shape == (4, 3, 3, 16) assert layer.b.get_value().shape == (16, 30, 30) def test_dimshuffle_false_get_output_for(self, DummyInputLayer): try: from lasagne.layers.cuda_convnet import Conv2DCCLayer except ImportError: pytest.skip("cuda_convnet not available") # this implementation is tested against FilterActs instead of # theano.tensor.nnet.conv.conv2d because using the latter leads to # numerical precision errors. from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs filter_acts = FilterActs(stride=1, pad=0, partial_sum=1) input = theano.shared(floatX(np.random.random((4, 5, 5, 8)))) kernel = theano.shared(floatX(np.random.random((4, 3, 3, 16)))) input_layer = DummyInputLayer((4, 5, 5, 8)) # c01b instead of bc01 layer = Conv2DCCLayer(input_layer, num_filters=16, filter_size=(3, 3), dimshuffle=False, W=kernel, b=None, nonlinearity=None) output = np.array(filter_acts(input, kernel).eval()) actual = layer.get_output_for(input).eval() actual = np.array(actual) assert actual.shape == output.shape assert actual.shape == layer.output_shape assert np.allclose(actual, output) class TestShuffleLayers: def test_bc01_to_c01b(self): from lasagne.layers.input import InputLayer try: from lasagne.layers.cuda_convnet import ShuffleBC01ToC01BLayer except ImportError: pytest.skip("cuda_convnet not available") input_layer = InputLayer((1, 2, 3, 4)) layer = ShuffleBC01ToC01BLayer(input_layer) assert layer.output_shape == (2, 3, 4, 1) input = floatX(np.random.random((1, 2, 3, 4))) output = input.transpose(1, 2, 3, 0) actual = layer.get_output_for(theano.shared(input)).eval() assert np.allclose(output, actual) def test_c01b_to_bc01(self): from lasagne.layers.input import InputLayer try: from lasagne.layers.cuda_convnet import ShuffleC01BToBC01Layer except ImportError: pytest.skip("cuda_convnet not available") input_layer = InputLayer((1, 2, 3, 4)) layer = ShuffleC01BToBC01Layer(input_layer) assert layer.output_shape == (4, 1, 2, 3) input = floatX(np.random.random((1, 2, 3, 4))) output = input.transpose(3, 0, 1, 2) actual = layer.get_output_for(theano.shared(input)).eval() assert np.allclose(output, actual) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_recurrent.py0000644000175000017500000013520413307306052030561 0ustar sinclairssinclairsimport pytest from lasagne.layers import RecurrentLayer, LSTMLayer, CustomRecurrentLayer from lasagne.layers import InputLayer, DenseLayer, GRULayer, Gate, Layer from lasagne.layers import helper import theano import theano.tensor as T import numpy as np import lasagne from mock import Mock def test_recurrent_return_shape(): num_batch, seq_len, n_features1, n_features2 = 5, 3, 10, 11 num_units = 6 x = T.tensor4() in_shp = (num_batch, seq_len, n_features1, n_features2) l_inp = InputLayer(in_shp) l_rec = RecurrentLayer(l_inp, num_units=num_units) x_in = np.random.random(in_shp).astype('float32') output = helper.get_output(l_rec, x) output_val = output.eval({x: x_in}) assert helper.get_output_shape(l_rec, x_in.shape) == output_val.shape assert output_val.shape == (num_batch, seq_len, num_units) @pytest.mark.parametrize('num_units', (6, 1)) def test_recurrent_grad(num_units): num_batch, seq_len, n_features = 5, 3, 10 l_inp = InputLayer((num_batch, seq_len, n_features)) l_rec = RecurrentLayer(l_inp, num_units=num_units) output = helper.get_output(l_rec) g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_rec)) assert isinstance(g, (list, tuple)) def test_recurrent_nparams(): l_inp = InputLayer((2, 2, 3)) l_rec = RecurrentLayer(l_inp, 5, learn_init=False, nonlinearity=None) # b, W_hid_to_hid and W_in_to_hid assert len(lasagne.layers.get_all_params(l_rec, trainable=True)) == 3 # b + hid_init assert len(lasagne.layers.get_all_params(l_rec, regularizable=False)) == 2 def test_recurrent_nparams_learn_init(): l_inp = InputLayer((2, 2, 3)) l_rec = RecurrentLayer(l_inp, 5, learn_init=True) # b, W_hid_to_hid and W_in_to_hid + hid_init assert len(lasagne.layers.get_all_params(l_rec, trainable=True)) == 4 # b + hid_init assert len(lasagne.layers.get_all_params(l_rec, regularizable=False)) == 2 def test_recurrent_hid_init_layer(): # test that you can set hid_init to be a layer l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_rec = RecurrentLayer(l_inp, 5, hid_init=l_inp_h) x = T.tensor3() h = T.matrix() output = lasagne.layers.get_output(l_rec, {l_inp: x, l_inp_h: h}) def test_recurrent_nparams_hid_init_layer(): # test that you can see layers through hid_init l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_inp_h_de = DenseLayer(l_inp_h, 7) l_rec = RecurrentLayer(l_inp, 7, hid_init=l_inp_h_de) # directly check the layers can be seen through hid_init assert lasagne.layers.get_all_layers(l_rec) == [l_inp, l_inp_h, l_inp_h_de, l_rec] # b, W_hid_to_hid and W_in_to_hid + W + b assert len(lasagne.layers.get_all_params(l_rec, trainable=True)) == 5 # b (recurrent) + b (dense) assert len(lasagne.layers.get_all_params(l_rec, regularizable=False)) == 2 def test_recurrent_hid_init_mask(): # test that you can set hid_init to be a layer when a mask is provided l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_inp_msk = InputLayer((2, 2)) l_rec = RecurrentLayer(l_inp, 5, hid_init=l_inp_h, mask_input=l_inp_msk) x = T.tensor3() h = T.matrix() msk = T.matrix() inputs = {l_inp: x, l_inp_h: h, l_inp_msk: msk} output = lasagne.layers.get_output(l_rec, inputs) def test_recurrent_hid_init_layer_eval(): # Test `hid_init` as a `Layer` with some dummy input. Compare the output of # a network with a `Layer` as input to `hid_init` to a network with a # `np.array` as input to `hid_init` n_units = 7 n_test_cases = 2 in_shp = (n_test_cases, 2, 3) in_h_shp = (1, n_units) # dummy inputs X_test = np.ones(in_shp, dtype=theano.config.floatX) Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX) Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1)) # network with `Layer` initializer for hid_init l_inp = InputLayer(in_shp) l_inp_h = InputLayer(in_h_shp) l_rec_inp_layer = RecurrentLayer(l_inp, n_units, hid_init=l_inp_h, nonlinearity=None) # network with `np.array` initializer for hid_init l_rec_nparray = RecurrentLayer(l_inp, n_units, hid_init=Xh_test, nonlinearity=None) # copy network parameters from l_rec_inp_layer to l_rec_nparray l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()]) l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()]) for k, v in l_rn_param.items(): if k in l_il_param: v.set_value(l_il_param[k].get_value()) # build the theano functions X = T.tensor3() Xh = T.matrix() output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer, {l_inp: X, l_inp_h: Xh}) output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X}) # test both nets with dummy input output_val_inp_layer = output_inp_layer.eval({X: X_test, Xh: Xh_test_batch}) output_val_nparray = output_nparray.eval({X: X_test}) # check output given `Layer` is the same as with `np.array` assert np.allclose(output_val_inp_layer, output_val_nparray) def test_recurrent_incoming_tuple(): input_shape = (2, 3, 4) l_rec = lasagne.layers.RecurrentLayer(input_shape, 5) assert l_rec.input_shapes[0] == input_shape def test_recurrent_name(): l_in = lasagne.layers.InputLayer((2, 3, 4)) layer_name = 'l_rec' l_rec = lasagne.layers.RecurrentLayer(l_in, 4, name=layer_name) assert l_rec.b.name == layer_name + '.input_to_hidden.b' assert l_rec.W_in_to_hid.name == layer_name + '.input_to_hidden.W' assert l_rec.W_hid_to_hid.name == layer_name + '.hidden_to_hidden.W' def test_custom_recurrent_arbitrary_shape(): # Check that the custom recurrent layer can handle more than 1 feature dim n_batch, n_steps, n_channels, width, height = (2, 3, 4, 5, 6) n_out_filters = 7 filter_shape = (3, 3) l_in = lasagne.layers.InputLayer( (n_batch, n_steps, n_channels, width, height)) l_in_to_hid = lasagne.layers.Conv2DLayer( lasagne.layers.InputLayer((None, n_channels, width, height)), n_out_filters, filter_shape, pad='same') l_hid_to_hid = lasagne.layers.Conv2DLayer( lasagne.layers.InputLayer((None, n_out_filters, width, height)), n_out_filters, filter_shape, pad='same') l_rec = lasagne.layers.CustomRecurrentLayer( l_in, l_in_to_hid, l_hid_to_hid) assert l_rec.output_shape == (n_batch, n_steps, n_out_filters, width, height) out = theano.function([l_in.input_var], lasagne.layers.get_output(l_rec)) out_shape = out(np.zeros((n_batch, n_steps, n_channels, width, height), dtype=theano.config.floatX)).shape assert out_shape == (n_batch, n_steps, n_out_filters, width, height) def test_custom_recurrent_arbitrary_depth(): # Check that the custom recurrent layer can handle a hidden-to-hidden # network with an arbitrary depth n_batch, n_steps, n_channels, width, height = (2, 3, 4, 5, 6) n_out_filters = 7 n_in_hid_filters_0 = 11 n_hid_hid_filters_0 = 13 filter_shape = (3, 3) l_in = lasagne.layers.InputLayer( (n_batch, n_steps, n_channels, width, height)) # Expect the output shape of `l_in` as input shape for input-to-hidden l_in_to_hid = lasagne.layers.InputLayer((None, n_channels, width, height)) # Two conv layers; first to `n_hid_filters_0` channels l_in_to_hid = lasagne.layers.Conv2DLayer( l_in_to_hid, n_in_hid_filters_0, filter_shape, pad='same') # then to `n_out_filters` channels l_in_to_hid = lasagne.layers.Conv2DLayer( l_in_to_hid, n_out_filters, filter_shape, pad='same') # Expect the output shape of `l_in_to_hid` as input shape for # hidden-to-hidden l_hid_to_hid = lasagne.layers.InputLayer((None, n_out_filters, width, height)) # Two conv layers; first to `n_hid_hid_filters_0` channels l_hid_to_hid = lasagne.layers.Conv2DLayer( l_hid_to_hid, n_hid_hid_filters_0, filter_shape, pad='same') # then to `n_out_filters` channels l_hid_to_hid = lasagne.layers.Conv2DLayer( l_hid_to_hid, n_out_filters, filter_shape, pad='same') l_rec = lasagne.layers.CustomRecurrentLayer( l_in, l_in_to_hid, l_hid_to_hid) assert l_rec.output_shape == (n_batch, n_steps, n_out_filters, width, height) out = theano.function([l_in.input_var], lasagne.layers.get_output(l_rec)) out_shape = out(np.zeros((n_batch, n_steps, n_channels, width, height), dtype=theano.config.floatX)).shape assert out_shape == (n_batch, n_steps, n_out_filters, width, height) def test_custom_recurrent_non_unique_inputs(): # Check that the custom recurrent layer constructor detects non-unique # input layers within the input-to-hidden and hidden-to-hidden graphs # and raises ValueError n_batch, n_steps, n_channels, width, height = (2, 3, 4, 5, 6) n_out_filters = 7 n_in_hid_filters_0 = 11 n_hid_hid_filters_0 = 13 filter_shape = (3, 3) l_in = lasagne.layers.InputLayer( (n_batch, n_steps, n_channels, width, height)) # Bad input-to-hidden graph with multiple input layers # Expect the output shape of `l_in` as input shape for input-to-hidden l_in_to_hid_bad_0 = lasagne.layers.InputLayer( (None, n_channels, width, height)) l_in_to_hid_bad_1 = lasagne.layers.InputLayer( (None, n_channels, width, height)) l_in_to_hid_bad = lasagne.layers.ConcatLayer( [l_in_to_hid_bad_0, l_in_to_hid_bad_1], axis=1) # Two conv layers; first to `n_hid_filters_0` channels l_in_to_hid_bad = lasagne.layers.Conv2DLayer( l_in_to_hid_bad, n_in_hid_filters_0, filter_shape, pad='same') # then to `n_out_filters` channels l_in_to_hid_bad = lasagne.layers.Conv2DLayer( l_in_to_hid_bad, n_out_filters, filter_shape, pad='same') # Expect the output shape of `l_in` as input shape for input-to-hidden l_in_to_hid = lasagne.layers.InputLayer((None, n_channels, width, height)) # Two conv layers; first to `n_hid_filters_0` channels l_in_to_hid = lasagne.layers.Conv2DLayer( l_in_to_hid, n_in_hid_filters_0, filter_shape, pad='same') # then to `n_out_filters` channels l_in_to_hid = lasagne.layers.Conv2DLayer( l_in_to_hid, n_out_filters, filter_shape, pad='same') # Bad hidden-to-hidden graph with multiple input layers # Expect the output shape of `l_in_to_hid` as input shape for # hidden-to-hidden l_hid_to_hid_bad_0 = lasagne.layers.InputLayer( (None, n_out_filters, width, height)) l_hid_to_hid_bad_1 = lasagne.layers.InputLayer( (None, n_out_filters, width, height)) l_hid_to_hid_bad = lasagne.layers.ConcatLayer( [l_hid_to_hid_bad_0, l_hid_to_hid_bad_1], axis=1) # Two conv layers; first to `n_hid_hid_filters_0` channels l_hid_to_hid_bad = lasagne.layers.Conv2DLayer( l_hid_to_hid_bad, n_hid_hid_filters_0, filter_shape, pad='same') # then to `n_out_filters` channels l_hid_to_hid_bad = lasagne.layers.Conv2DLayer( l_hid_to_hid_bad, n_out_filters, filter_shape, pad='same') # Expect the output shape of `l_in_to_hid` as input shape for # hidden-to-hidden l_hid_to_hid = lasagne.layers.InputLayer((None, n_out_filters, width, height)) # Two conv layers; first to `n_hid_hid_filters_0` channels l_hid_to_hid = lasagne.layers.Conv2DLayer( l_hid_to_hid, n_hid_hid_filters_0, filter_shape, pad='same') # then to `n_out_filters` channels l_hid_to_hid = lasagne.layers.Conv2DLayer( l_hid_to_hid, n_out_filters, filter_shape, pad='same') # Ensure that trying to use either 'bad' graph raises ValueError with pytest.raises(ValueError): l_rec = lasagne.layers.CustomRecurrentLayer( l_in, l_in_to_hid_bad, l_hid_to_hid) with pytest.raises(ValueError): l_rec = lasagne.layers.CustomRecurrentLayer( l_in, l_in_to_hid, l_hid_to_hid_bad) with pytest.raises(ValueError): l_rec = lasagne.layers.CustomRecurrentLayer( l_in, l_in_to_hid_bad, l_hid_to_hid_bad) l_rec = lasagne.layers.CustomRecurrentLayer( l_in, l_in_to_hid, l_hid_to_hid) def test_custom_recurrent_init_shape_error(): # Check that the custom recurrent layer throws errors for invalid shapes n_batch, n_steps, n_channels, width, height = (2, 3, 4, 5, 6) n_out_filters = 7 filter_shape = (3, 3) l_in = lasagne.layers.InputLayer( (n_batch, n_steps, n_channels, width, height)) l_hid_to_hid = lasagne.layers.Conv2DLayer( lasagne.layers.InputLayer((n_batch, n_out_filters, width, height)), n_out_filters, filter_shape, pad='same') # When precompute_input == True, input_to_hidden.shape[0] must be None # or n_batch*n_steps l_in_to_hid = lasagne.layers.Conv2DLayer( lasagne.layers.InputLayer((n_batch, n_channels, width, height)), n_out_filters, filter_shape, pad='same') with pytest.raises(ValueError): l_rec = lasagne.layers.CustomRecurrentLayer( l_in, l_in_to_hid, l_hid_to_hid, precompute_input=True) # When precompute_input = False, input_to_hidden.shape[1] must be None # or hidden_to_hidden.shape[1] l_in_to_hid = lasagne.layers.Conv2DLayer( lasagne.layers.InputLayer((n_batch + 1, n_channels, width, height)), n_out_filters, filter_shape, pad='same') with pytest.raises(ValueError): l_rec = lasagne.layers.CustomRecurrentLayer( l_in, l_in_to_hid, l_hid_to_hid, precompute_input=False) # In any case, input_to_hidden and hidden_to_hidden's output shapes after # the first dimension must match l_in_to_hid = lasagne.layers.Conv2DLayer( lasagne.layers.InputLayer((None, n_channels, width + 1, height)), n_out_filters, filter_shape, pad='same') with pytest.raises(ValueError): l_rec = lasagne.layers.CustomRecurrentLayer( l_in, l_in_to_hid, l_hid_to_hid) # And, the output shape of input_to_hidden must match the input shape # of hidden_to_hidden past the first dimension. By not using padding, # the output of l_in_to_hid will be cropped, which will make the # shape inappropriate. l_in_to_hid = lasagne.layers.Conv2DLayer( lasagne.layers.InputLayer((None, n_channels, width, height)), n_out_filters, filter_shape) l_hid_to_hid = lasagne.layers.Conv2DLayer( lasagne.layers.InputLayer((n_batch, n_out_filters, width, height)), n_out_filters, filter_shape) with pytest.raises(ValueError): l_rec = lasagne.layers.CustomRecurrentLayer( l_in, l_in_to_hid, l_hid_to_hid) def test_recurrent_grad_clipping(): num_units = 5 batch_size = 3 seq_len = 2 n_inputs = 4 in_shp = (batch_size, seq_len, n_inputs) l_inp = InputLayer(in_shp) x = T.tensor3() l_rec = RecurrentLayer(l_inp, num_units, grad_clipping=1.0) output = lasagne.layers.get_output(l_rec, x) def test_recurrent_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.ones(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_rec_fwd = RecurrentLayer(l_inp, num_units=num_units, backwards=False) lasagne.random.get_rng().seed(1234) l_rec_bck = RecurrentLayer(l_inp, num_units=num_units, backwards=True) l_out_fwd = helper.get_output(l_rec_fwd, x) l_out_bck = helper.get_output(l_rec_bck, x) output_fwd = l_out_fwd.eval({l_out_fwd: x_in}) output_bck = l_out_bck.eval({l_out_bck: x_in}) # test that the backwards model reverses its final input np.testing.assert_almost_equal(output_fwd, output_bck[:, ::-1]) def test_recurrent_variable_input_size(): # check that seqlen and batchsize None works num_batch, n_features1 = 6, 5 num_units = 13 x = T.tensor3() in_shp = (None, None, n_features1) l_inp = InputLayer(in_shp) x_in1 = np.ones((num_batch+1, 10, n_features1)).astype('float32') x_in2 = np.ones((num_batch, 15, n_features1)).astype('float32') l_rec = RecurrentLayer(l_inp, num_units=num_units, backwards=False) output = helper.get_output(l_rec, x) output_val1 = output.eval({x: x_in1}) output_val2 = output.eval({x: x_in2}) def test_recurrent_unroll_scan_fwd(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones(in_shp[:2]).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_rec_scan = RecurrentLayer(l_inp, num_units=num_units, backwards=False, unroll_scan=False, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_rec_unroll = RecurrentLayer(l_inp, num_units=num_units, backwards=False, unroll_scan=True, mask_input=l_mask_inp) output_scan = helper.get_output(l_rec_scan) output_unrolled = helper.get_output(l_rec_unroll) output_scan_val = output_scan.eval( {l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) output_unrolled_val = output_unrolled.eval( {l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val) def test_recurrent_unroll_scan_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.random.random(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_rec_scan = RecurrentLayer(l_inp, num_units=num_units, backwards=True, unroll_scan=False) lasagne.random.get_rng().seed(1234) l_rec_unroll = RecurrentLayer(l_inp, num_units=num_units, backwards=True, unroll_scan=True) output_scan = helper.get_output(l_rec_scan, x) output_unrolled = helper.get_output(l_rec_unroll, x) output_scan_val = output_scan.eval({x: x_in}) output_unrolled_val = output_unrolled.eval({x: x_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val) def test_recurrent_precompute(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones((num_batch, seq_len), dtype='float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_rec_precompute = RecurrentLayer(l_inp, num_units=num_units, precompute_input=True, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_rec_no_precompute = RecurrentLayer(l_inp, num_units=num_units, precompute_input=False, mask_input=l_mask_inp) output_precompute = helper.get_output( l_rec_precompute).eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) output_no_precompute = helper.get_output( l_rec_no_precompute).eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) np.testing.assert_almost_equal(output_precompute, output_no_precompute) def test_recurrent_return_final(): num_batch, seq_len, n_features = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features) x_in = np.random.random(in_shp).astype('float32') l_inp = InputLayer(in_shp) lasagne.random.get_rng().seed(1234) l_rec_final = RecurrentLayer(l_inp, num_units, only_return_final=True) lasagne.random.get_rng().seed(1234) l_rec_all = RecurrentLayer(l_inp, num_units, only_return_final=False) output_final = helper.get_output(l_rec_final).eval({l_inp.input_var: x_in}) output_all = helper.get_output(l_rec_all).eval({l_inp.input_var: x_in}) assert output_final.shape == (output_all.shape[0], output_all.shape[2]) assert output_final.shape == lasagne.layers.get_output_shape(l_rec_final) assert np.allclose(output_final, output_all[:, -1]) def test_lstm_return_shape(): num_batch, seq_len, n_features1, n_features2 = 5, 3, 10, 11 num_units = 6 x = T.tensor4() in_shp = (num_batch, seq_len, n_features1, n_features2) l_inp = InputLayer(in_shp) x_in = np.random.random(in_shp).astype('float32') l_lstm = LSTMLayer(l_inp, num_units=num_units) output = helper.get_output(l_lstm, x) output_val = output.eval({x: x_in}) assert helper.get_output_shape(l_lstm, x_in.shape) == output_val.shape assert output_val.shape == (num_batch, seq_len, num_units) @pytest.mark.parametrize('num_units', (6, 1)) def test_lstm_grad(num_units): num_batch, seq_len, n_features = 5, 3, 10 l_inp = InputLayer((num_batch, seq_len, n_features)) l_lstm = LSTMLayer(l_inp, num_units=num_units) output = helper.get_output(l_lstm) g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_lstm)) assert isinstance(g, (list, tuple)) def test_lstm_nparams_no_peepholes(): l_inp = InputLayer((2, 2, 3)) l_lstm = LSTMLayer(l_inp, 5, peepholes=False, learn_init=False) # 3*n_gates # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate assert len(lasagne.layers.get_all_params(l_lstm, trainable=True)) == 12 # bias params + init params assert len(lasagne.layers.get_all_params(l_lstm, regularizable=False)) == 6 def test_lstm_nparams_peepholes(): l_inp = InputLayer((2, 2, 3)) l_lstm = LSTMLayer(l_inp, 5, peepholes=True, learn_init=False) # 3*n_gates + peepholes(3). # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate assert len(lasagne.layers.get_all_params(l_lstm, trainable=True)) == 15 # bias params(4) + init params(2) assert len(lasagne.layers.get_all_params(l_lstm, regularizable=False)) == 6 def test_lstm_nparams_learn_init(): l_inp = InputLayer((2, 2, 3)) l_lstm = LSTMLayer(l_inp, 5, peepholes=False, learn_init=True) # 3*n_gates + inits(2). # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate assert len(lasagne.layers.get_all_params(l_lstm, trainable=True)) == 14 # bias params(4) + init params(2) assert len(lasagne.layers.get_all_params(l_lstm, regularizable=False)) == 6 def test_lstm_hid_init_layer(): # test that you can set hid_init to be a layer l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_cell_h = InputLayer((2, 5)) l_lstm = LSTMLayer(l_inp, 5, hid_init=l_inp_h, cell_init=l_cell_h) x = T.tensor3() h = T.matrix() output = lasagne.layers.get_output(l_lstm, {l_inp: x, l_inp_h: h}) def test_lstm_nparams_hid_init_layer(): # test that you can see layers through hid_init l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_inp_h_de = DenseLayer(l_inp_h, 7) l_inp_cell = InputLayer((2, 5)) l_inp_cell_de = DenseLayer(l_inp_cell, 7) l_lstm = LSTMLayer(l_inp, 7, hid_init=l_inp_h_de, cell_init=l_inp_cell_de) # directly check the layers can be seen through hid_init layers_to_find = [l_inp, l_inp_h, l_inp_h_de, l_inp_cell, l_inp_cell_de, l_lstm] assert lasagne.layers.get_all_layers(l_lstm) == layers_to_find # 3*n_gates + 4 # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate # 4 is for the W and b parameters in the two DenseLayer layers assert len(lasagne.layers.get_all_params(l_lstm, trainable=True)) == 19 # GRU bias params(3) + Dense bias params(1) * 2 assert len(lasagne.layers.get_all_params(l_lstm, regularizable=False)) == 6 def test_lstm_hid_init_mask(): # test that you can set hid_init to be a layer when a mask is provided l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_inp_msk = InputLayer((2, 2)) l_cell_h = InputLayer((2, 5)) l_lstm = LSTMLayer(l_inp, 5, hid_init=l_inp_h, mask_input=l_inp_msk, cell_init=l_cell_h) x = T.tensor3() h = T.matrix() msk = T.matrix() inputs = {l_inp: x, l_inp_h: h, l_inp_msk: msk} output = lasagne.layers.get_output(l_lstm, inputs) def test_lstm_hid_init_layer_eval(): # Test `hid_init` as a `Layer` with some dummy input. Compare the output of # a network with a `Layer` as input to `hid_init` to a network with a # `np.array` as input to `hid_init` n_units = 7 n_test_cases = 2 in_shp = (n_test_cases, 2, 3) in_h_shp = (1, n_units) in_cell_shp = (1, n_units) # dummy inputs X_test = np.ones(in_shp, dtype=theano.config.floatX) Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX) Xc_test = np.ones(in_cell_shp, dtype=theano.config.floatX) Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1)) Xc_test_batch = np.tile(Xc_test, (n_test_cases, 1)) # network with `Layer` initializer for hid_init l_inp = InputLayer(in_shp) l_inp_h = InputLayer(in_h_shp) l_inp_cell = InputLayer(in_cell_shp) l_rec_inp_layer = LSTMLayer(l_inp, n_units, hid_init=l_inp_h, cell_init=l_inp_cell, nonlinearity=None) # network with `np.array` initializer for hid_init l_rec_nparray = LSTMLayer(l_inp, n_units, hid_init=Xh_test, cell_init=Xc_test, nonlinearity=None) # copy network parameters from l_rec_inp_layer to l_rec_nparray l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()]) l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()]) for k, v in l_rn_param.items(): if k in l_il_param: v.set_value(l_il_param[k].get_value()) # build the theano functions X = T.tensor3() Xh = T.matrix() Xc = T.matrix() output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer, {l_inp: X, l_inp_h: Xh, l_inp_cell: Xc}) output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X}) # test both nets with dummy input output_val_inp_layer = output_inp_layer.eval({X: X_test, Xh: Xh_test_batch, Xc: Xc_test_batch}) output_val_nparray = output_nparray.eval({X: X_test}) # check output given `Layer` is the same as with `np.array` assert np.allclose(output_val_inp_layer, output_val_nparray) def test_lstm_grad_clipping(): # test that you can set grad_clip variable x = T.tensor3() l_rec = LSTMLayer(InputLayer((2, 2, 3)), 5, grad_clipping=1) output = lasagne.layers.get_output(l_rec, x) def test_lstm_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.ones(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_lstm_fwd = LSTMLayer(l_inp, num_units=num_units, backwards=False) lasagne.random.get_rng().seed(1234) l_lstm_bck = LSTMLayer(l_inp, num_units=num_units, backwards=True) output_fwd = helper.get_output(l_lstm_fwd, x) output_bck = helper.get_output(l_lstm_bck, x) output_fwd_val = output_fwd.eval({x: x_in}) output_bck_val = output_bck.eval({x: x_in}) # test that the backwards model reverses its final input np.testing.assert_almost_equal(output_fwd_val, output_bck_val[:, ::-1]) def test_lstm_precompute(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones((num_batch, seq_len), dtype='float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_lstm_precompute = LSTMLayer( l_inp, num_units=num_units, precompute_input=True, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_lstm_no_precompute = LSTMLayer( l_inp, num_units=num_units, precompute_input=False, mask_input=l_mask_inp) output_precompute = helper.get_output( l_lstm_precompute).eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) output_no_precompute = helper.get_output( l_lstm_no_precompute).eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) # test that the backwards model reverses its final input np.testing.assert_almost_equal(output_precompute, output_no_precompute) def test_lstm_variable_input_size(): # that seqlen and batchsize None works num_batch, n_features1 = 6, 5 num_units = 13 x = T.tensor3() in_shp = (None, None, n_features1) l_inp = InputLayer(in_shp) x_in1 = np.ones((num_batch+1, 3+1, n_features1)).astype('float32') x_in2 = np.ones((num_batch, 3, n_features1)).astype('float32') l_rec = LSTMLayer(l_inp, num_units=num_units, backwards=False) output = helper.get_output(l_rec, x) output_val1 = output.eval({x: x_in1}) output_val2 = output.eval({x: x_in2}) def test_lstm_unroll_scan_fwd(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones(in_shp[:2]).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_lstm_scan = LSTMLayer(l_inp, num_units=num_units, backwards=False, unroll_scan=False, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_lstm_unrolled = LSTMLayer(l_inp, num_units=num_units, backwards=False, unroll_scan=True, mask_input=l_mask_inp) output_scan = helper.get_output(l_lstm_scan) output_unrolled = helper.get_output(l_lstm_unrolled) output_scan_val = output_scan.eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) output_unrolled_val = output_unrolled.eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val) def test_lstm_unroll_scan_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.random.random(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_lstm_scan = LSTMLayer(l_inp, num_units=num_units, backwards=True, unroll_scan=False) lasagne.random.get_rng().seed(1234) l_lstm_unrolled = LSTMLayer(l_inp, num_units=num_units, backwards=True, unroll_scan=True) output_scan = helper.get_output(l_lstm_scan, x) output_scan_unrolled = helper.get_output(l_lstm_unrolled, x) output_scan_val = output_scan.eval({x: x_in}) output_unrolled_val = output_scan_unrolled.eval({x: x_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val) def test_lstm_passthrough(): # Tests that the LSTM can simply pass through its input l_in = InputLayer((4, 5, 6)) zero = lasagne.init.Constant(0.) one = lasagne.init.Constant(1.) pass_gate = Gate(zero, zero, zero, one, None) no_gate = Gate(zero, zero, zero, zero, None) in_pass_gate = Gate( np.eye(6).astype(theano.config.floatX), zero, zero, zero, None) l_rec = LSTMLayer( l_in, 6, pass_gate, no_gate, in_pass_gate, pass_gate, None) out = lasagne.layers.get_output(l_rec) inp = np.arange(4*5*6).reshape(4, 5, 6).astype(theano.config.floatX) np.testing.assert_almost_equal(out.eval({l_in.input_var: inp}), inp) def test_lstm_return_final(): num_batch, seq_len, n_features = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features) x_in = np.random.random(in_shp).astype('float32') l_inp = InputLayer(in_shp) lasagne.random.get_rng().seed(1234) l_rec_final = LSTMLayer(l_inp, num_units, only_return_final=True) lasagne.random.get_rng().seed(1234) l_rec_all = LSTMLayer(l_inp, num_units, only_return_final=False) output_final = helper.get_output(l_rec_final).eval({l_inp.input_var: x_in}) output_all = helper.get_output(l_rec_all).eval({l_inp.input_var: x_in}) assert output_final.shape == (output_all.shape[0], output_all.shape[2]) assert output_final.shape == lasagne.layers.get_output_shape(l_rec_final) assert np.allclose(output_final, output_all[:, -1]) def test_gru_return_shape(): num_batch, seq_len, n_features1, n_features2 = 5, 3, 10, 11 num_units = 6 x = T.tensor4() in_shp = (num_batch, seq_len, n_features1, n_features2) l_inp = InputLayer(in_shp) l_rec = GRULayer(l_inp, num_units=num_units) x_in = np.random.random(in_shp).astype('float32') output = helper.get_output(l_rec, x) output_val = output.eval({x: x_in}) assert helper.get_output_shape(l_rec, x_in.shape) == output_val.shape assert output_val.shape == (num_batch, seq_len, num_units) @pytest.mark.parametrize('num_units', (6, 1)) def test_gru_grad(num_units): num_batch, seq_len, n_features = 5, 3, 10 l_inp = InputLayer((num_batch, seq_len, n_features)) l_gru = GRULayer(l_inp, num_units=num_units) output = helper.get_output(l_gru) g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_gru)) assert isinstance(g, (list, tuple)) def test_gru_nparams_learn_init_false(): l_inp = InputLayer((2, 2, 3)) l_gru = GRULayer(l_inp, 5, learn_init=False) # 3*n_gates # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate assert len(lasagne.layers.get_all_params(l_gru, trainable=True)) == 9 # bias params(3) + hid_init assert len(lasagne.layers.get_all_params(l_gru, regularizable=False)) == 4 def test_gru_nparams_learn_init_true(): l_inp = InputLayer((2, 2, 3)) l_gru = GRULayer(l_inp, 5, learn_init=True) # 3*n_gates + hid_init # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate assert len(lasagne.layers.get_all_params(l_gru, trainable=True)) == 10 # bias params(3) + init params(1) assert len(lasagne.layers.get_all_params(l_gru, regularizable=False)) == 4 def test_gru_hid_init_layer(): # test that you can set hid_init to be a layer l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_gru = GRULayer(l_inp, 5, hid_init=l_inp_h) x = T.tensor3() h = T.matrix() output = lasagne.layers.get_output(l_gru, {l_inp: x, l_inp_h: h}) def test_gru_nparams_hid_init_layer(): # test that you can see layers through hid_init l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_inp_h_de = DenseLayer(l_inp_h, 7) l_gru = GRULayer(l_inp, 7, hid_init=l_inp_h_de) # directly check the layers can be seen through hid_init assert lasagne.layers.get_all_layers(l_gru) == [l_inp, l_inp_h, l_inp_h_de, l_gru] # 3*n_gates + 2 # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate # 2 is for the W and b parameters in the DenseLayer assert len(lasagne.layers.get_all_params(l_gru, trainable=True)) == 11 # GRU bias params(3) + Dense bias params(1) assert len(lasagne.layers.get_all_params(l_gru, regularizable=False)) == 4 def test_gru_hid_init_layer_eval(): # Test `hid_init` as a `Layer` with some dummy input. Compare the output of # a network with a `Layer` as input to `hid_init` to a network with a # `np.array` as input to `hid_init` n_units = 7 n_test_cases = 2 in_shp = (n_test_cases, 2, 3) in_h_shp = (1, n_units) # dummy inputs X_test = np.ones(in_shp, dtype=theano.config.floatX) Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX) Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1)) # network with `Layer` initializer for hid_init l_inp = InputLayer(in_shp) l_inp_h = InputLayer(in_h_shp) l_rec_inp_layer = GRULayer(l_inp, n_units, hid_init=l_inp_h) # network with `np.array` initializer for hid_init l_rec_nparray = GRULayer(l_inp, n_units, hid_init=Xh_test) # copy network parameters from l_rec_inp_layer to l_rec_nparray l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()]) l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()]) for k, v in l_rn_param.items(): if k in l_il_param: v.set_value(l_il_param[k].get_value()) # build the theano functions X = T.tensor3() Xh = T.matrix() output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer, {l_inp: X, l_inp_h: Xh}) output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X}) # test both nets with dummy input output_val_inp_layer = output_inp_layer.eval({X: X_test, Xh: Xh_test_batch}) output_val_nparray = output_nparray.eval({X: X_test}) # check output given `Layer` is the same as with `np.array` assert np.allclose(output_val_inp_layer, output_val_nparray) def test_gru_hid_init_mask(): # test that you can set hid_init to be a layer when a mask is provided l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_inp_msk = InputLayer((2, 2)) l_gru = GRULayer(l_inp, 5, hid_init=l_inp_h, mask_input=l_inp_msk) x = T.tensor3() h = T.matrix() msk = T.matrix() inputs = {l_inp: x, l_inp_h: h, l_inp_msk: msk} output = lasagne.layers.get_output(l_gru, inputs) def test_gru_grad_clipping(): # test that you can set grad_clip variable x = T.tensor3() l_rec = GRULayer(InputLayer((2, 2, 3)), 5, grad_clipping=1) output = lasagne.layers.get_output(l_rec, x) def test_gru_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.ones(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_gru_fwd = GRULayer(l_inp, num_units=num_units, backwards=False) lasagne.random.get_rng().seed(1234) l_gru_bck = GRULayer(l_inp, num_units=num_units, backwards=True) output_fwd = helper.get_output(l_gru_fwd, x) output_bck = helper.get_output(l_gru_bck, x) output_fwd_val = output_fwd.eval({x: x_in}) output_bck_val = output_bck.eval({x: x_in}) # test that the backwards model reverses its final input np.testing.assert_almost_equal(output_fwd_val, output_bck_val[:, ::-1]) def test_gru_variable_input_size(): # that seqlen and batchsize None works num_batch, n_features1 = 6, 5 num_units = 13 x = T.tensor3() in_shp = (None, None, n_features1) l_inp = InputLayer(in_shp) x_in1 = np.ones((num_batch+1, 10, n_features1)).astype('float32') x_in2 = np.ones((num_batch, 15, n_features1)).astype('float32') l_rec = GRULayer(l_inp, num_units=num_units, backwards=False) output = helper.get_output(l_rec, x) output.eval({x: x_in1}) output.eval({x: x_in2}) def test_gru_unroll_scan_fwd(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones(in_shp[:2]).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_gru_scan = GRULayer(l_inp, num_units=num_units, backwards=False, unroll_scan=False, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_gru_unrolled = GRULayer(l_inp, num_units=num_units, backwards=False, unroll_scan=True, mask_input=l_mask_inp) output_scan = helper.get_output(l_gru_scan) output_unrolled = helper.get_output(l_gru_unrolled) output_scan_val = output_scan.eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) output_unrolled_val = output_unrolled.eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val) def test_gru_unroll_scan_bck(): num_batch, seq_len, n_features1 = 2, 5, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.random.random(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_gru_scan = GRULayer(l_inp, num_units=num_units, backwards=True, unroll_scan=False) lasagne.random.get_rng().seed(1234) l_gru_unrolled = GRULayer(l_inp, num_units=num_units, backwards=True, unroll_scan=True) output_scan = helper.get_output(l_gru_scan, x) output_unrolled = helper.get_output(l_gru_unrolled, x) output_scan_val = output_scan.eval({x: x_in}) output_unrolled_val = output_unrolled.eval({x: x_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val) def test_gru_precompute(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones((num_batch, seq_len), dtype='float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_gru_precompute = GRULayer(l_inp, num_units=num_units, precompute_input=True, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_gru_no_precompute = GRULayer(l_inp, num_units=num_units, precompute_input=False, mask_input=l_mask_inp) output_precompute = helper.get_output( l_gru_precompute).eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) output_no_precompute = helper.get_output( l_gru_no_precompute).eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) # test that the backwards model reverses its final input np.testing.assert_almost_equal(output_precompute, output_no_precompute) def test_gru_passthrough(): # Tests that the LSTM can simply pass through its input l_in = InputLayer((4, 5, 6)) zero = lasagne.init.Constant(0.) one = lasagne.init.Constant(1.) pass_gate = Gate(zero, zero, None, one, None) no_gate = Gate(zero, zero, None, zero, None) in_pass_gate = Gate( np.eye(6).astype(theano.config.floatX), zero, None, zero, None) l_rec = GRULayer(l_in, 6, no_gate, pass_gate, in_pass_gate) out = lasagne.layers.get_output(l_rec) inp = np.arange(4*5*6).reshape(4, 5, 6).astype(theano.config.floatX) np.testing.assert_almost_equal(out.eval({l_in.input_var: inp}), inp) def test_gru_return_final(): num_batch, seq_len, n_features = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features) x_in = np.random.random(in_shp).astype('float32') l_inp = InputLayer(in_shp) lasagne.random.get_rng().seed(1234) l_rec_final = GRULayer(l_inp, num_units, only_return_final=True) lasagne.random.get_rng().seed(1234) l_rec_all = GRULayer(l_inp, num_units, only_return_final=False) output_final = helper.get_output(l_rec_final).eval({l_inp.input_var: x_in}) output_all = helper.get_output(l_rec_all).eval({l_inp.input_var: x_in}) assert output_final.shape == (output_all.shape[0], output_all.shape[2]) assert output_final.shape == lasagne.layers.get_output_shape(l_rec_final) assert np.allclose(output_final, output_all[:, -1]) def test_gradient_steps_error(): # Check that error is raised if gradient_steps is not -1 and scan_unroll # is true l_in = InputLayer((2, 2, 3)) with pytest.raises(ValueError): RecurrentLayer(l_in, 5, gradient_steps=3, unroll_scan=True) with pytest.raises(ValueError): LSTMLayer(l_in, 5, gradient_steps=3, unroll_scan=True) with pytest.raises(ValueError): GRULayer(l_in, 5, gradient_steps=3, unroll_scan=True) def test_unroll_none_input_error(): # Test that a ValueError is raised if unroll scan is True and the input # sequence length is specified as None. l_in = InputLayer((2, None, 3)) with pytest.raises(ValueError): RecurrentLayer(l_in, 5, unroll_scan=True) with pytest.raises(ValueError): LSTMLayer(l_in, 5, unroll_scan=True) with pytest.raises(ValueError): GRULayer(l_in, 5, unroll_scan=True) def test_CustomRecurrentLayer_child_kwargs(): in_shape = (2, 3, 4) n_hid = 5 # Construct mock for input-to-hidden layer in_to_hid = Mock( Layer, output_shape=(in_shape[0]*in_shape[1], n_hid), input_shape=(in_shape[0]*in_shape[1], in_shape[2]), input_layer=InputLayer((in_shape[0]*in_shape[1], in_shape[2])), get_output_kwargs=['foo']) # These two functions get called, need to return dummy values for them in_to_hid.get_output_for.return_value = T.matrix() in_to_hid.get_params.return_value = [] # As above, for hidden-to-hidden layer hid_to_hid = Mock( Layer, output_shape=(in_shape[0], n_hid), input_shape=(in_shape[0], n_hid), input_layer=InputLayer((in_shape[0], n_hid)), get_output_kwargs=[]) hid_to_hid.get_output_for.return_value = T.matrix() hid_to_hid.get_params.return_value = [] # Construct a CustomRecurrentLayer using these Mocks l_rec = lasagne.layers.CustomRecurrentLayer( InputLayer(in_shape), in_to_hid, hid_to_hid) # Call get_output with a kwarg, should be passd to in_to_hid and hid_to_hid helper.get_output(l_rec, foo='bar') # Retrieve the arguments used to call in_to_hid.get_output_for args, kwargs = in_to_hid.get_output_for.call_args # Should be one argument - the Theano expression assert len(args) == 1 # One keywould argument - should be 'foo' -> 'bar' assert kwargs == {'foo': 'bar'} # Same as with in_to_hid args, kwargs = hid_to_hid.get_output_for.call_args assert len(args) == 1 assert kwargs == {'foo': 'bar'} Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_embedding.py0000644000175000017500000000317313307306052030465 0ustar sinclairssinclairsimport numpy import pytest import theano def test_embedding_2D_input(): import numpy as np import theano import theano.tensor as T from lasagne.layers import EmbeddingLayer, InputLayer, helper x = T.imatrix() batch_size = 2 seq_len = 3 emb_size = 5 vocab_size = 3 l_in = InputLayer((None, seq_len)) W = np.arange( vocab_size*emb_size).reshape((vocab_size, emb_size)).astype('float32') l1 = EmbeddingLayer(l_in, input_size=vocab_size, output_size=emb_size, W=W) x_test = np.array([[0, 1, 2], [0, 0, 2]], dtype='int32') # check output shape assert helper.get_output_shape( l1, (batch_size, seq_len)) == (batch_size, seq_len, emb_size) output = helper.get_output(l1, x) f = theano.function([x], output) np.testing.assert_array_almost_equal(f(x_test), W[x_test]) def test_embedding_1D_input(): import numpy as np import theano import theano.tensor as T from lasagne.layers import EmbeddingLayer, InputLayer, helper x = T.ivector() batch_size = 2 emb_size = 10 vocab_size = 3 l_in = InputLayer((None,)) W = np.arange( vocab_size*emb_size).reshape((vocab_size, emb_size)).astype('float32') l1 = EmbeddingLayer(l_in, input_size=vocab_size, output_size=emb_size, W=W) x_test = np.array([0, 1, 2], dtype='int32') # check output shape assert helper.get_output_shape( l1, (batch_size, )) == (batch_size, emb_size) output = helper.get_output(l1, x) f = theano.function([x], output) np.testing.assert_array_almost_equal(f(x_test), W[x_test]) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_local.py0000644000175000017500000001636113307306052027644 0ustar sinclairssinclairsimport numpy as np import pytest import theano from lasagne.utils import floatX def locally_connected2d(input, W, flip_filters=True): """ 2D convolution with unshared weights, no stride, 'same' padding, no dilation and no bias """ num_batch, input_channels, input_rows, input_cols = input.shape assert W.shape[1] == input_channels num_filters, input_channels, \ filter_rows, filter_cols, output_rows, output_cols = W.shape assert filter_rows % 2 == 1 assert filter_cols % 2 == 1 output = np.zeros((num_batch, num_filters, output_rows, output_cols)) for b in range(num_batch): for f in range(num_filters): for c in range(input_channels): for i_out in range(output_rows): for j_out in range(output_cols): for i_filter in range(filter_rows): i_in = i_out + i_filter - (filter_rows // 2) if not (0 <= i_in < input_rows): continue for j_filter in range(filter_cols): j_in = j_out + j_filter - (filter_cols // 2) if not (0 <= j_in < input_cols): continue if flip_filters: inc = (input[b, c, i_in, j_in] * W[f, c, -i_filter-1, -j_filter-1, i_out, j_out]) else: inc = (input[b, c, i_in, j_in] * W[f, c, i_filter, j_filter, i_out, j_out]) output[b, f, i_out, j_out] += inc return output def channelwise_locally_connected2d(input, W, flip_filters=True): """ channelwise 2D convolution with unshared weights, no stride, 'same' padding, no dilation and no bias """ num_batch, input_channels, input_rows, input_cols = input.shape num_filters, filter_rows, filter_cols, output_rows, output_cols = W.shape assert input_channels == num_filters assert filter_rows % 2 == 1 assert filter_cols % 2 == 1 output = np.zeros((num_batch, num_filters, output_rows, output_cols)) for b in range(num_batch): for f in range(num_filters): for i_out in range(output_rows): for j_out in range(output_cols): for i_filter in range(filter_rows): i_in = i_out + i_filter - (filter_rows // 2) if not (0 <= i_in < input_rows): continue for j_filter in range(filter_cols): j_in = j_out + j_filter - (filter_cols // 2) if not (0 <= j_in < input_cols): continue if flip_filters: inc = (input[b, f, i_in, j_in] * W[f, -i_filter-1, -j_filter-1, i_out, j_out]) else: inc = (input[b, f, i_in, j_in] * W[f, i_filter, j_filter, i_out, j_out]) output[b, f, i_out, j_out] += inc return output def locally_connected2d_test_sets(): def _convert(input, W, output, kwargs): return [floatX(input), floatX(W), output, kwargs] for batch_size in (2, 3): for input_shape in ((batch_size, 2, 5, 5), (batch_size, 4, 8, 8)): for num_filters in (2, 4): for filter_size in ((3, 3), (3, 5)): for flip_filters in (True, False): for channelwise in (True, False): if channelwise and num_filters != input_shape[1]: continue input = np.random.random(input_shape) if channelwise: W = np.random.random( (num_filters,) + filter_size + input_shape[2:]) output = channelwise_locally_connected2d( input, W, flip_filters=flip_filters) else: W = np.random.random( (num_filters, input_shape[1]) + filter_size + input_shape[2:]) output = locally_connected2d( input, W, flip_filters=flip_filters) yield _convert(input, W, output, {'num_filters': num_filters, 'filter_size': filter_size, 'flip_filters': flip_filters, 'channelwise': channelwise}) @pytest.fixture def DummyInputLayer(): def factory(shape): from lasagne.layers.input import InputLayer return InputLayer(shape) return factory class TestLocallyConnected2DLayer: @pytest.mark.parametrize( "input, W, output, kwargs", list(locally_connected2d_test_sets())) def test_defaults(self, DummyInputLayer, input, W, output, kwargs): from lasagne.layers import LocallyConnected2DLayer b, c, h, w = input.shape input_layer = DummyInputLayer((b, c, h, w)) layer = LocallyConnected2DLayer( input_layer, W=W, **kwargs) actual = layer.get_output_for(theano.shared(input)).eval() assert actual.shape == output.shape assert actual.shape == layer.output_shape assert np.allclose(actual, output) def test_unsupported_settings(self, DummyInputLayer): from lasagne.layers import LocallyConnected2DLayer input_layer = DummyInputLayer((10, 2, 4, 4)) for pad in 'valid', 'full', 1: with pytest.raises(NotImplementedError) as exc: LocallyConnected2DLayer(input_layer, 2, 3, pad=pad) assert "requires pad='same'" in exc.value.args[0] with pytest.raises(NotImplementedError) as exc: LocallyConnected2DLayer(input_layer, 2, 3, stride=2) assert "requires stride=1 / (1, 1)" in exc.value.args[0] def test_invalid_settings(self, DummyInputLayer): from lasagne.layers import LocallyConnected2DLayer input_layer = DummyInputLayer((10, 2, 4, 4)) with pytest.raises(ValueError) as exc: LocallyConnected2DLayer(input_layer, 4, 3, channelwise=True) assert "num_filters and the number of input channels should match" \ in exc.value.args[0] input_layer = DummyInputLayer((10, 2, None, 4)) with pytest.raises(ValueError) as exc: LocallyConnected2DLayer(input_layer, 4, 3, channelwise=True) assert "A LocallyConnected2DLayer requires a fixed input shape " \ "(except for the batch size)" in exc.value.args[0] Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_merge.py0000644000175000017500000002461313307306052027650 0ustar sinclairssinclairsfrom mock import Mock import numpy import pytest import theano class TestAutocrop: # Test internal helper methods of MergeCropLayer def test_autocrop_array_shapes(self): from lasagne.layers.merge import autocrop_array_shapes crop0 = None crop1 = [None, 'lower', 'center', 'upper'] # Too few crop modes; should get padded with None crop2 = ['lower', 'upper'] # Invalid crop modes crop_bad = ['lower', 'upper', 'bad', 'worse'] assert autocrop_array_shapes( [(1, 2, 3, 4), (5, 6, 7, 8), (5, 4, 3, 2)], crop0) == \ [(1, 2, 3, 4), (5, 6, 7, 8), (5, 4, 3, 2)] assert autocrop_array_shapes( [(1, 2, 3, 4), (5, 6, 7, 8), (5, 4, 3, 2)], crop1) == \ [(1, 2, 3, 2), (5, 2, 3, 2), (5, 2, 3, 2)] assert autocrop_array_shapes( [(1, 2, 3, 4), (5, 6, 7, 8), (5, 4, 3, 2)], crop2) == \ [(1, 2, 3, 4), (1, 2, 7, 8), (1, 2, 3, 2)] assert autocrop_array_shapes( [(None, 2, 3, 4), (5, 6, 7, 8), (5, 4, 3, 2)], crop2) == \ [(None, 2, 3, 4), (None, 2, 7, 8), (None, 2, 3, 2)] with pytest.raises(ValueError): autocrop_array_shapes( [(1, 2, 3, 4), (5, 6, 7, 8), (5, 4, 3, 2)], crop_bad) # Inconsistent dimensionality with pytest.raises(ValueError): autocrop_array_shapes( [(1, 2, 3, 4), (5, 6, 7), (5, 4, 3, 2, 10)], crop1) def test_crop_inputs(self): from lasagne.layers.merge import autocrop from numpy.testing import assert_array_equal crop_0 = None crop_1 = [None, 'lower', 'center', 'upper'] crop_l = ['lower', 'lower', 'lower', 'lower'] crop_c = ['center', 'center', 'center', 'center'] crop_u = ['upper', 'upper', 'upper', 'upper'] crop_x = ['lower', 'lower'] crop_bad = ['lower', 'lower', 'bad', 'worse'] x0 = numpy.random.random((2, 3, 5, 7)) x1 = numpy.random.random((1, 2, 3, 4)) x2 = numpy.random.random((6, 3, 4, 2)) def crop_test(cropping, inputs, expected): inputs = [theano.shared(x) for x in inputs] outs = autocrop(inputs, cropping) outs = [o.eval() for o in outs] assert len(outs) == len(expected) for o, e in zip(outs, expected): assert_array_equal(o, e) crop_test(crop_0, [x0, x1], [x0, x1]) crop_test(crop_1, [x0, x1], [x0[:, :2, 1:4, 3:], x1[:, :, :, :]]) crop_test(crop_l, [x0, x1], [x0[:1, :2, :3, :4], x1[:, :, :, :]]) crop_test(crop_c, [x0, x1], [x0[:1, :2, 1:4, 1:5], x1[:, :, :, :]]) crop_test(crop_u, [x0, x1], [x0[1:, 1:, 2:, 3:], x1[:, :, :, :]]) crop_test(crop_0, [x0, x2], [x0, x2]) crop_test(crop_1, [x0, x2], [x0[:, :, :4, 5:], x2[:, :, :, :]]) crop_test(crop_l, [x0, x2], [x0[:, :, :4, :2], x2[:2, :, :, :]]) crop_test(crop_c, [x0, x2], [x0[:, :, :4, 2:4], x2[2:4, :, :, :]]) crop_test(crop_u, [x0, x2], [x0[:, :, 1:, 5:], x2[4:, :, :, :]]) crop_test(crop_0, [x0, x1, x2], [x0, x1, x2]) crop_test(crop_1, [x0, x1, x2], [x0[:, :2, 1:4, 5:], x1[:, :, :, 2:], x2[:, :2, :3, :]]) crop_test(crop_l, [x0, x1, x2], [x0[:1, :2, :3, :2], x1[:, :, :, :2], x2[:1, :2, :3, :]]) crop_test(crop_c, [x0, x1, x2], [x0[:1, :2, 1:4, 2:4], x1[:, :, :, 1:3], x2[2:3, :2, :3, :]]) crop_test(crop_u, [x0, x1, x2], [x0[1:, 1:, 2:, 5:], x1[:, :, :, 2:], x2[5:, 1:, 1:, :]]) crop_test(crop_x, [x0, x1, x2], [x0[:1, :2, :, :], x1[:1, :2, :, :], x2[:1, :2, :, :]]) # test that num outputs is correct when the number of inputs is # larger than ndim of the inputs. crop_test(crop_x, [x0, x1, x2, x0, x1, x2], [x0[:1, :2, :, :], x1[:1, :2, :, :], x2[:1, :2, :, :], x0[:1, :2, :, :], x1[:1, :2, :, :], x2[:1, :2, :, :]]) with pytest.raises(ValueError): crop_test(crop_bad, [x0, x1, x2], [x0[:1, :2, :, :], x1[:1, :2, :, :], x2[:1, :2, :, :]]) # Inconsistent dimensionality with pytest.raises(ValueError): crop_test(crop_bad, [x0[:, :, :, 0], x1, x2[:, :, :, :, None]], [x0[:1, :2, :, :], x1[:1, :2, :, :], x2[:1, :2, :, :]]) class TestConcatLayer: def layer(self, axis): from lasagne.layers.merge import ConcatLayer return ConcatLayer([Mock(), Mock()], axis=axis) @pytest.fixture def crop_layer_0(self): from lasagne.layers.merge import ConcatLayer return ConcatLayer([Mock(), Mock()], axis=0, cropping=['lower'] * 2) @pytest.fixture def crop_layer_1(self): from lasagne.layers.merge import ConcatLayer return ConcatLayer([Mock(), Mock()], axis=1, cropping=['lower'] * 2) @pytest.mark.parametrize("axis", (1, -1)) def test_get_output_shape_for(self, axis): layer = self.layer(axis) assert layer.get_output_shape_for([(3, 2), (3, 5)]) == (3, 7) assert layer.get_output_shape_for([(3, 2), (3, None)]) == (3, None) assert layer.get_output_shape_for([(None, 2), (3, 5)]) == (3, 7) assert layer.get_output_shape_for([(None, 2), (None, 5)]) == (None, 7) with pytest.raises(ValueError): layer.get_output_shape_for([(4, None), (3, 5)]) with pytest.raises(ValueError): layer.get_output_shape_for([(3, 2), (4, None)]) with pytest.raises(ValueError): layer.get_output_shape_for([(None, 2), (3, 5), (4, 5)]) def test_get_output_shape_for_cropped(self, crop_layer_0, crop_layer_1): input_shapes = [(3, 2), (4, 5)] result_0 = crop_layer_0.get_output_shape_for(input_shapes) result_1 = crop_layer_1.get_output_shape_for(input_shapes) assert result_0 == (7, 2) assert result_1 == (3, 7) @pytest.mark.parametrize("axis", (1, -1)) def test_get_output_for(self, axis): layer = self.layer(axis) inputs = [theano.shared(numpy.ones((3, 3))), theano.shared(numpy.ones((3, 2)))] result = layer.get_output_for(inputs) result_eval = result.eval() desired_result = numpy.hstack([input.get_value() for input in inputs]) assert (result_eval == desired_result).all() def test_get_output_for_cropped(self, crop_layer_0, crop_layer_1): x0 = numpy.random.random((5, 3)) x1 = numpy.random.random((4, 2)) inputs = [theano.shared(x0), theano.shared(x1)] result_0 = crop_layer_0.get_output_for(inputs).eval() result_1 = crop_layer_1.get_output_for(inputs).eval() desired_result_0 = numpy.concatenate([x0[:, :2], x1[:, :2]], axis=0) desired_result_1 = numpy.concatenate([x0[:4, :], x1[:4, :]], axis=1) assert (result_0 == desired_result_0).all() assert (result_1 == desired_result_1).all() class TestElemwiseSumLayer: @pytest.fixture def layer(self): from lasagne.layers.merge import ElemwiseSumLayer return ElemwiseSumLayer([Mock(), Mock()], coeffs=[2, -1]) @pytest.fixture def crop_layer(self): from lasagne.layers.merge import ElemwiseSumLayer return ElemwiseSumLayer([Mock(), Mock()], coeffs=[2, -1], cropping=['lower'] * 2) def test_get_output_shape_for(self, layer): assert layer.get_output_shape_for([(3, 2), (3, 2)]) == (3, 2) assert layer.get_output_shape_for([(3, 2), (3, None)]) == (3, 2) assert layer.get_output_shape_for([(None, 2), (3, 2)]) == (3, 2) assert layer.get_output_shape_for([(None, 2), (None, 2)]) == (None, 2) with pytest.raises(ValueError): layer.get_output_shape_for([(3, None), (4, 2)]) with pytest.raises(ValueError): layer.get_output_shape_for([(3, 2), (4, None)]) with pytest.raises(ValueError): layer.get_output_shape_for([(None, 2), (3, 2), (4, 2)]) def test_get_output_for(self, layer): a = numpy.array([[0, 1], [2, 3]]) b = numpy.array([[1, 2], [4, 5]]) inputs = [theano.shared(a), theano.shared(b)] result = layer.get_output_for(inputs) result_eval = result.eval() desired_result = 2*a - b assert (result_eval == desired_result).all() def test_get_output_for_cropped(self, crop_layer): from numpy.testing import assert_array_almost_equal as aeq x0 = numpy.random.random((5, 3)) x1 = numpy.random.random((4, 2)) inputs = [theano.shared(x0), theano.shared(x1)] result = crop_layer.get_output_for(inputs).eval() desired_result = 2*x0[:4, :2] - x1[:4, :2] aeq(result, desired_result) def test_bad_coeffs_fails(self, layer): from lasagne.layers.merge import ElemwiseSumLayer with pytest.raises(ValueError): ElemwiseSumLayer([Mock(), Mock()], coeffs=[2, 3, -1]) class TestElemwiseMergeLayerMul: @pytest.fixture def layer(self): import theano.tensor as T from lasagne.layers.merge import ElemwiseMergeLayer return ElemwiseMergeLayer([Mock(), Mock()], merge_function=T.mul) def test_get_output_for(self, layer): a = numpy.array([[0, 1], [2, 3]]) b = numpy.array([[1, 2], [4, 5]]) inputs = [theano.shared(a), theano.shared(b)] result = layer.get_output_for(inputs) result_eval = result.eval() desired_result = a*b assert (result_eval == desired_result).all() class TestElemwiseMergeLayerMaximum: @pytest.fixture def layer(self): import theano.tensor as T from lasagne.layers.merge import ElemwiseMergeLayer return ElemwiseMergeLayer([Mock(), Mock()], merge_function=T.maximum) def test_get_output_for(self, layer): a = numpy.array([[0, 1], [2, 3]]) b = numpy.array([[1, 2], [4, 5]]) inputs = [theano.shared(a), theano.shared(b)] result = layer.get_output_for(inputs) result_eval = result.eval() desired_result = numpy.maximum(a, b) assert (result_eval == desired_result).all() Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/layers/test_input.py0000644000175000017500000000322713307306052027706 0ustar sinclairssinclairsimport pytest import theano class TestInputLayer: @pytest.fixture def layer(self): from lasagne.layers.input import InputLayer return InputLayer((3, 2)) def test_input_var(self, layer): assert layer.input_var.ndim == 2 def test_shape(self, layer): assert layer.shape == (3, 2) def test_shape_list(self, layer): from lasagne.layers.input import InputLayer assert InputLayer([3, 2]).shape == (3, 2) def test_input_var_bcast(self): from lasagne.layers.input import InputLayer assert InputLayer((3, 2)).input_var.broadcastable == (False, False) assert InputLayer((1, 2)).input_var.broadcastable == (True, False) assert InputLayer((None, 1)).input_var.broadcastable == (False, True) def test_input_var_name(self, layer): assert layer.input_var.name == "input" def test_named_layer_input_var_name(self): from lasagne.layers.input import InputLayer layer = InputLayer((3, 2), name="foo") assert layer.input_var.name == "foo.input" def test_get_params(self, layer): assert layer.get_params() == [] def test_bad_shape_fails(self): from lasagne.layers.input import InputLayer input_var = theano.tensor.tensor4() with pytest.raises(ValueError): InputLayer((3, 2), input_var) def test_nonpositive_input_dims_raises_value_error(self): from lasagne.layers import InputLayer with pytest.raises(ValueError): InputLayer(shape=(None, -1, -1)) with pytest.raises(ValueError): InputLayer(shape=(None, 0, 0)) InputLayer(shape=(None, 1, 1)) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/tests/test_regularization.py0000644000175000017500000000624513307306052030312 0ustar sinclairssinclairsimport pytest import numpy as np import theano.tensor as T import lasagne from collections import OrderedDict from theano.scan_module.scan_utils import equal_computations from mock import Mock class TestRegularizationPenalties(object): def l1(self, x): return np.abs(x).sum() def l2(self, x): return (x**2).sum() @pytest.mark.parametrize('penalty', ['l1', 'l2']) def test_penalty(self, penalty): np_penalty = getattr(self, penalty) theano_penalty = getattr(lasagne.regularization, penalty) X = T.matrix() X0 = lasagne.utils.floatX(np.random.uniform(-3, 3, (10, 10))) theano_result = theano_penalty(X).eval({X: X0}) np_result = np_penalty(X0) assert np.allclose(theano_result, np_result) class TestRegularizationHelpers(object): @pytest.fixture def layers(self): l_1 = lasagne.layers.InputLayer((None, 10)) l_2 = lasagne.layers.DenseLayer(l_1, num_units=20) l_3 = lasagne.layers.DenseLayer(l_2, num_units=30) return l_1, l_2, l_3 def test_apply_penalty(self): from lasagne.regularization import apply_penalty, l2 A = T.vector() B = T.matrix() assert apply_penalty([], l2) == 0 assert equal_computations([apply_penalty(A, l2)], [l2(A)]) assert equal_computations([apply_penalty([A, B], l2)], [sum([l2(A), l2(B)])]) def test_regularize_layer_params_single_layer(self, layers): from lasagne.regularization import regularize_layer_params l_1, l_2, l_3 = layers penalty = Mock(return_value=0) loss = regularize_layer_params(l_2, penalty) assert penalty.call_count == 1 penalty.assert_any_call(l_2.W) def test_regularize_layer_params_multiple_layers(self, layers): from lasagne.regularization import regularize_layer_params l_1, l_2, l_3 = layers penalty = Mock(return_value=0) loss = regularize_layer_params([l_1, l_2, l_3], penalty) assert penalty.call_count == 2 penalty.assert_any_call(l_2.W) penalty.assert_any_call(l_3.W) def test_regularize_network_params(self, layers): from lasagne.regularization import regularize_network_params l_1, l_2, l_3 = layers penalty = Mock(return_value=0) loss = regularize_network_params(l_3, penalty) assert penalty.call_count == 2 penalty.assert_any_call(l_2.W) penalty.assert_any_call(l_3.W) def test_regularize_layer_params_weighted(self, layers): from lasagne.regularization import regularize_layer_params_weighted from lasagne.regularization import apply_penalty, l2 l_1, l_2, l_3 = layers layers = OrderedDict() layers[l_2] = 0.1 layers[l_3] = 0.5 loss = regularize_layer_params_weighted(layers, lasagne.regularization.l2) assert equal_computations([loss], [sum([0.1 * apply_penalty([l_2.W], l2), 0.5 * apply_penalty([l_3.W], l2)])]) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/0000755000175000017500000000000013307306052023770 5ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/normalization.py0000644000175000017500000006464513307306052027247 0ustar sinclairssinclairs# -*- coding: utf-8 -*- """ The :class:`LocalResponseNormalization2DLayer ` implementation contains code from `pylearn2 `_, which is covered by the following license: Copyright (c) 2011--2014, Université de Montréal All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ import theano import theano.tensor as T from .. import init from .. import nonlinearities from ..utils import int_types from .base import Layer __all__ = [ "LocalResponseNormalization2DLayer", "BatchNormLayer", "batch_norm", "StandardizationLayer", "instance_norm", "layer_norm", ] class LocalResponseNormalization2DLayer(Layer): """ Cross-channel Local Response Normalization for 2D feature maps. Aggregation is purely across channels, not within channels, and performed "pixelwise". If the value of the :math:`i` th channel is :math:`x_i`, the output is .. math:: x_i = \\frac{x_i}{ (k + ( \\alpha \\sum_j x_j^2 ))^\\beta } where the summation is performed over this position on :math:`n` neighboring channels. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. Must follow *BC01* layout, i.e., ``(batchsize, channels, rows, columns)``. alpha : float scalar coefficient, see equation above k : float scalar offset, see equation above beta : float scalar exponent, see equation above n : int number of adjacent channels to normalize over, must be odd Notes ----- This code is adapted from pylearn2. See the module docstring for license information. """ def __init__(self, incoming, alpha=1e-4, k=2, beta=0.75, n=5, **kwargs): super(LocalResponseNormalization2DLayer, self).__init__(incoming, **kwargs) self.alpha = alpha self.k = k self.beta = beta self.n = n if n % 2 == 0: raise NotImplementedError("Only works with odd n") def get_output_shape_for(self, input_shape): return input_shape def get_output_for(self, input, **kwargs): input_shape = self.input_shape if any(s is None for s in input_shape): input_shape = input.shape half_n = self.n // 2 input_sqr = T.sqr(input) b, ch, r, c = input_shape extra_channels = T.alloc(0., b, ch + 2*half_n, r, c) input_sqr = T.set_subtensor(extra_channels[:, half_n:half_n+ch, :, :], input_sqr) scale = self.k for i in range(self.n): scale += self.alpha * input_sqr[:, i:i+ch, :, :] scale = scale ** self.beta return input / scale class BatchNormLayer(Layer): """ lasagne.layers.BatchNormLayer(incoming, axes='auto', epsilon=1e-4, alpha=0.1, beta=lasagne.init.Constant(0), gamma=lasagne.init.Constant(1), mean=lasagne.init.Constant(0), inv_std=lasagne.init.Constant(1), **kwargs) Batch Normalization This layer implements batch normalization of its inputs, following [1]_: .. math:: y = \\frac{x - \\mu}{\\sqrt{\\sigma^2 + \\epsilon}} \\gamma + \\beta That is, the input is normalized to zero mean and unit variance, and then linearly transformed. The crucial part is that the mean and variance are computed across the batch dimension, i.e., over examples, not per example. During training, :math:`\\mu` and :math:`\\sigma^2` are defined to be the mean and variance of the current input mini-batch :math:`x`, and during testing, they are replaced with average statistics over the training data. Consequently, this layer has four stored parameters: :math:`\\beta`, :math:`\\gamma`, and the averages :math:`\\mu` and :math:`\\sigma^2` (nota bene: instead of :math:`\\sigma^2`, the layer actually stores :math:`1 / \\sqrt{\\sigma^2 + \\epsilon}`, for compatibility to cuDNN). By default, this layer learns the average statistics as exponential moving averages computed during training, so it can be plugged into an existing network without any changes of the training procedure (see Notes). Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape axes : 'auto', int or tuple of int The axis or axes to normalize over. If ``'auto'`` (the default), normalize over all axes except for the second: this will normalize over the minibatch dimension for dense layers, and additionally over all spatial dimensions for convolutional layers. epsilon : scalar Small constant :math:`\\epsilon` added to the variance before taking the square root and dividing by it, to avoid numerical problems alpha : scalar Coefficient for the exponential moving average of batch-wise means and standard deviations computed during training; the closer to one, the more it will depend on the last batches seen beta : Theano shared variable, expression, numpy array, callable or None Initial value, expression or initializer for :math:`\\beta`. Must match the incoming shape, skipping all axes in `axes`. Set to ``None`` to fix it to 0.0 instead of learning it. See :func:`lasagne.utils.create_param` for more information. gamma : Theano shared variable, expression, numpy array, callable or None Initial value, expression or initializer for :math:`\\gamma`. Must match the incoming shape, skipping all axes in `axes`. Set to ``None`` to fix it to 1.0 instead of learning it. See :func:`lasagne.utils.create_param` for more information. mean : Theano shared variable, expression, numpy array, or callable Initial value, expression or initializer for :math:`\\mu`. Must match the incoming shape, skipping all axes in `axes`. See :func:`lasagne.utils.create_param` for more information. inv_std : Theano shared variable, expression, numpy array, or callable Initial value, expression or initializer for :math:`1 / \\sqrt{ \\sigma^2 + \\epsilon}`. Must match the incoming shape, skipping all axes in `axes`. See :func:`lasagne.utils.create_param` for more information. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- This layer should be inserted between a linear transformation (such as a :class:`DenseLayer`, or :class:`Conv2DLayer`) and its nonlinearity. The convenience function :func:`batch_norm` modifies an existing layer to insert batch normalization in front of its nonlinearity. The behavior can be controlled by passing keyword arguments to :func:`lasagne.layers.get_output()` when building the output expression of any network containing this layer. During training, [1]_ normalize each input mini-batch by its statistics and update an exponential moving average of the statistics to be used for validation. This can be achieved by passing ``deterministic=False``. For validation, [1]_ normalize each input mini-batch by the stored statistics. This can be achieved by passing ``deterministic=True``. For more fine-grained control, ``batch_norm_update_averages`` can be passed to update the exponential moving averages (``True``) or not (``False``), and ``batch_norm_use_averages`` can be passed to use the exponential moving averages for normalization (``True``) or normalize each mini-batch by its own statistics (``False``). These settings override ``deterministic``. Note that for testing a model after training, [1]_ replace the stored exponential moving average statistics by fixing all network weights and re-computing average statistics over the training data in a layerwise fashion. This is not part of the layer implementation. In case you set `axes` to not include the batch dimension (the first axis, usually), normalization is done per example, not across examples. This does not require any averages, so you can pass ``batch_norm_update_averages`` and ``batch_norm_use_averages`` as ``False`` in this case. See also -------- batch_norm : Convenience function to apply batch normalization to a layer References ---------- .. [1] Ioffe, Sergey and Szegedy, Christian (2015): Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. http://arxiv.org/abs/1502.03167. """ def __init__(self, incoming, axes='auto', epsilon=1e-4, alpha=0.1, beta=init.Constant(0), gamma=init.Constant(1), mean=init.Constant(0), inv_std=init.Constant(1), **kwargs): super(BatchNormLayer, self).__init__(incoming, **kwargs) if axes == 'auto': # default: normalize over all but the second axis axes = (0,) + tuple(range(2, len(self.input_shape))) elif isinstance(axes, int_types): axes = (axes,) self.axes = axes self.epsilon = epsilon self.alpha = alpha # create parameters, ignoring all dimensions in axes shape = [size for axis, size in enumerate(self.input_shape) if axis not in self.axes] if any(size is None for size in shape): raise ValueError("BatchNormLayer needs specified input sizes for " "all axes not normalized over.") if beta is None: self.beta = None else: self.beta = self.add_param(beta, shape, 'beta', trainable=True, regularizable=False) if gamma is None: self.gamma = None else: self.gamma = self.add_param(gamma, shape, 'gamma', trainable=True, regularizable=True) self.mean = self.add_param(mean, shape, 'mean', trainable=False, regularizable=False, batch_norm_stat=True) self.inv_std = self.add_param(inv_std, shape, 'inv_std', trainable=False, regularizable=False, batch_norm_stat=True) def get_output_for(self, input, deterministic=False, batch_norm_use_averages=None, batch_norm_update_averages=None, **kwargs): input_mean = input.mean(self.axes) input_inv_std = T.inv(T.sqrt(input.var(self.axes) + self.epsilon)) # Decide whether to use the stored averages or mini-batch statistics if batch_norm_use_averages is None: batch_norm_use_averages = deterministic use_averages = batch_norm_use_averages if use_averages: mean = self.mean inv_std = self.inv_std else: mean = input_mean inv_std = input_inv_std # Decide whether to update the stored averages if batch_norm_update_averages is None: batch_norm_update_averages = not deterministic update_averages = batch_norm_update_averages if update_averages: # Trick: To update the stored statistics, we create memory-aliased # clones of the stored statistics: running_mean = theano.clone(self.mean, share_inputs=False) running_inv_std = theano.clone(self.inv_std, share_inputs=False) # set a default update for them: running_mean.default_update = ((1 - self.alpha) * running_mean + self.alpha * input_mean) running_inv_std.default_update = ((1 - self.alpha) * running_inv_std + self.alpha * input_inv_std) # and make sure they end up in the graph without participating in # the computation (this way their default_update will be collected # and applied, but the computation will be optimized away): mean += 0 * running_mean inv_std += 0 * running_inv_std # prepare dimshuffle pattern inserting broadcastable axes as needed param_axes = iter(range(input.ndim - len(self.axes))) pattern = ['x' if input_axis in self.axes else next(param_axes) for input_axis in range(input.ndim)] # apply dimshuffle pattern to all parameters beta = 0 if self.beta is None else self.beta.dimshuffle(pattern) gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern) mean = mean.dimshuffle(pattern) inv_std = inv_std.dimshuffle(pattern) # normalize normalized = (input - mean) * (gamma * inv_std) + beta return normalized def batch_norm(layer, **kwargs): """ Apply batch normalization to an existing layer. This is a convenience function modifying an existing layer to include batch normalization: It will steal the layer's nonlinearity if there is one (effectively introducing the normalization right before the nonlinearity), remove the layer's bias if there is one (because it would be redundant), and add a :class:`BatchNormLayer` and :class:`NonlinearityLayer` on top. Parameters ---------- layer : A :class:`Layer` instance The layer to apply the normalization to; note that it will be irreversibly modified as specified above **kwargs Any additional keyword arguments are passed on to the :class:`BatchNormLayer` constructor. Returns ------- BatchNormLayer or NonlinearityLayer instance A batch normalization layer stacked on the given modified `layer`, or a nonlinearity layer stacked on top of both if `layer` was nonlinear. Examples -------- Just wrap any layer into a :func:`batch_norm` call on creating it: >>> from lasagne.layers import InputLayer, DenseLayer, batch_norm >>> from lasagne.nonlinearities import tanh >>> l1 = InputLayer((64, 768)) >>> l2 = batch_norm(DenseLayer(l1, num_units=500, nonlinearity=tanh)) This introduces batch normalization right before its nonlinearity: >>> from lasagne.layers import get_all_layers >>> [l.__class__.__name__ for l in get_all_layers(l2)] ['InputLayer', 'DenseLayer', 'BatchNormLayer', 'NonlinearityLayer'] """ nonlinearity = getattr(layer, 'nonlinearity', None) if nonlinearity is not None: layer.nonlinearity = nonlinearities.identity if hasattr(layer, 'b') and layer.b is not None: del layer.params[layer.b] layer.b = None bn_name = (kwargs.pop('name', None) or (getattr(layer, 'name', None) and layer.name + '_bn')) layer = BatchNormLayer(layer, name=bn_name, **kwargs) if nonlinearity is not None: from .special import NonlinearityLayer nonlin_name = bn_name and bn_name + '_nonlin' layer = NonlinearityLayer(layer, nonlinearity, name=nonlin_name) return layer class StandardizationLayer(Layer): """ Standardize inputs to zero mean and unit variance: .. math:: y_i = \\frac{x_i - \\mu_i}{\\sqrt{\\sigma_i^2 + \\epsilon}} The mean :math:`\\mu_i` and variance :math:`\\sigma_i^2` are computed and shared across a given set of axes. In contrast to batch normalization, these axes usually do not include the batch dimension, so each example is normalized independently from other examples in the minibatch, both during training and testing. The :class:`StandardizationLayer` can be employed to realize instance normalization [1]_ and layer normalization [2]_, for both of which convenience functions (:func:`instance_norm` and :func:`layer_norm`) are available. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape axes : 'auto', 'spatial', 'features', int or tuple of int The axis or axes to normalize over. If ``'auto'`` (the default), two-dimensional inputs are normalized over the last dimension (i.e., this will normalize over units for dense layers), input tensors with more than two dimensions are normalized over all but the first two dimensions (i.e., this will normalize over all spatial dimensions for convolutional layers). If ``'spatial'``, will normalize over all but the first two dimensions. If ``'features'``, will normalize over all but the first dimension. epsilon : scalar Small constant :math:`\\epsilon` added to the variance before taking the square root and dividing by it, to avoid numerical problems **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. See also -------- instance_norm : Convenience function to apply instance normalization layer_norm : Convenience function to apply layer normalization to a layer References ---------- .. [1] Ulyanov, D., Vedaldi, A., & Lempitsky, V. (2016): Instance Normalization: The Missing Ingredient for Fast Stylization. https://arxiv.org/abs/1607.08022. .. [2] Ba, J., Kiros, J., & Hinton, G. (2016): Layer normalization. https://arxiv.org/abs/1607.06450. """ def __init__(self, incoming, axes='auto', epsilon=1e-4, **kwargs): super(StandardizationLayer, self).__init__(incoming, **kwargs) if axes == 'auto': # default: normalize across 2nd dimension for 2D inputs # and across all but the first two axes for 3D+ inputs if len(self.input_shape) == 2: axes = (1,) else: axes = tuple(range(2, len(self.input_shape))) elif axes == 'spatial': # normalize over spatial dimensions only, # separate for each instance in the batch axes = tuple(range(2, len(self.input_shape))) elif axes == 'features': # normalize over features and spatial dimensions, # separate for each instance in the batch axes = tuple(range(1, len(self.input_shape))) elif isinstance(axes, int): axes = (axes,) self.axes = axes self.epsilon = epsilon def get_output_for(self, input, **kwargs): mean = input.mean(self.axes, keepdims=True) std = T.sqrt(input.var(self.axes, keepdims=True) + self.epsilon) return (input - mean) / std def instance_norm(layer, learn_scale=True, learn_bias=True, **kwargs): """ Apply instance normalization to an existing layer. This is a convenience function modifying an existing layer to include instance normalization: It will steal the layer's nonlinearity if there is one (effectively introducing the normalization right before the nonlinearity), remove the layer's bias if there is one (because it would be effectless), and add a :class:`StandardizationLayer` and :class:`NonlinearityLayer` on top. Depending on the given arguments, an additional :class:`ScaleLayer` and :class:`BiasLayer` will be inserted in between. In effect, it will separately standardize each feature map of each input example, followed by an optional scale and shift learned per channel, followed by the original nonlinearity, as proposed in [1]_. Parameters ---------- layer : A :class:`Layer` instance The layer to apply the normalization to; note that it will be irreversibly modified as specified above learn_scale : bool (default: True) Whether to add a ScaleLayer after the StandardizationLayer learn_bias : bool (default: True) Whether to add a BiasLayer after the StandardizationLayer (or the optional ScaleLayer) **kwargs Any additional keyword arguments are passed on to the :class:`StandardizationLayer` constructor. Returns ------- StandardizationLayer, ScaleLayer, BiasLayer, or NonlinearityLayer instance The last layer stacked on top of the given modified `layer` to implement instance normalization with optional scaling and shifting. Examples -------- Just wrap any layer into a :func:`instance_norm` call on creating it: >>> from lasagne.layers import InputLayer, Conv2DLayer, instance_norm >>> from lasagne.nonlinearities import rectify >>> l1 = InputLayer((10, 3, 28, 28)) >>> l2 = instance_norm(Conv2DLayer(l1, num_filters=64, filter_size=3, ... nonlinearity=rectify)) This introduces instance normalization right before its nonlinearity: >>> from lasagne.layers import get_all_layers >>> [l.__class__.__name__ for l in get_all_layers(l2)] ['InputLayer', 'Conv2DLayer', 'StandardizationLayer', \ 'ScaleLayer', 'BiasLayer', 'NonlinearityLayer'] References ---------- .. [1] Ulyanov, D., Vedaldi, A., & Lempitsky, V. (2016): Instance Normalization: The Missing Ingredient for Fast Stylization. https://arxiv.org/abs/1607.08022. """ nonlinearity = getattr(layer, 'nonlinearity', None) if nonlinearity is not None: layer.nonlinearity = nonlinearities.identity if hasattr(layer, 'b') and layer.b is not None: del layer.params[layer.b] layer.b = None in_name = (kwargs.pop('name', None) or (getattr(layer, 'name', None) and layer.name + '_in')) layer = StandardizationLayer(layer, axes='spatial', name=in_name, **kwargs) if learn_scale: from .special import ScaleLayer scale_name = in_name and in_name + '_scale' layer = ScaleLayer(layer, shared_axes='auto', name=scale_name) if learn_bias: from .special import BiasLayer bias_name = in_name and in_name + '_bias' layer = BiasLayer(layer, shared_axes='auto', name=bias_name) if nonlinearity is not None: from .special import NonlinearityLayer nonlin_name = in_name and in_name + '_nonlin' layer = NonlinearityLayer(layer, nonlinearity, name=nonlin_name) return layer def layer_norm(layer, **kwargs): """ Apply layer normalization to an existing layer. This is a convenience function modifying an existing layer to include layer normalization: It will steal the layer's nonlinearity if there is one (effectively introducing the normalization right before the nonlinearity), remove the layer's bias if there is one, and add a :class:`StandardizationLayer`, :class:`ScaleLayer`, :class:`BiasLayer`, and :class:`NonlinearityLayer` on top. In effect, it will standardize each input example across the feature and spatial dimensions (if any), followed by a scale and shift learned per feature, followed by the original nonlinearity, as proposed in [1]_. Parameters ---------- layer : A :class:`Layer` instance The layer to apply the normalization to; note that it will be irreversibly modified as specified above **kwargs Any additional keyword arguments are passed on to the :class:`StandardizationLayer` constructor. Returns ------- StandardizationLayer or NonlinearityLayer instance The last layer stacked on top of the given modified `layer` to implement layer normalization with feature-wise scaling and shifting. Examples -------- Just wrap any layer into a :func:`layer_norm` call on creating it: >>> from lasagne.layers import InputLayer, DenseLayer, layer_norm >>> from lasagne.nonlinearities import rectify >>> l1 = InputLayer((10, 28)) >>> l2 = layer_norm(DenseLayer(l1, num_units=64, nonlinearity=rectify)) This introduces layer normalization right before its nonlinearity: >>> from lasagne.layers import get_all_layers >>> [l.__class__.__name__ for l in get_all_layers(l2)] ['InputLayer', 'DenseLayer', 'StandardizationLayer', \ 'ScaleLayer', 'BiasLayer', 'NonlinearityLayer'] References ---------- .. [1] Ba, J., Kiros, J., & Hinton, G. (2016): Layer normalization. https://arxiv.org/abs/1607.06450. """ nonlinearity = getattr(layer, 'nonlinearity', None) if nonlinearity is not None: layer.nonlinearity = nonlinearities.identity ln_name = (kwargs.pop('name', None) or (getattr(layer, 'name', None) and layer.name + '_ln')) if hasattr(layer, 'b') and layer.b is not None: del layer.params[layer.b] layer.b = None layer = StandardizationLayer(layer, axes='features', name=ln_name, **kwargs) scale_name = ln_name and ln_name + '_scale' from .special import ScaleLayer layer = ScaleLayer(layer, shared_axes='auto', name=scale_name) from .special import BiasLayer bias_name = ln_name and ln_name + '_bias' layer = BiasLayer(layer, shared_axes='auto', name=bias_name) if nonlinearity is not None: from .special import NonlinearityLayer nonlin_name = ln_name and ln_name + '_nonlin' layer = NonlinearityLayer(layer, nonlinearity, name=nonlin_name) return layer Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/helper.py0000644000175000017500000005047413307306052025633 0ustar sinclairssinclairsfrom collections import deque from difflib import get_close_matches from itertools import chain from warnings import warn import theano import numpy as np from .. import utils __all__ = [ "get_all_layers", "get_output", "get_output_shape", "get_all_params", "count_params", "get_all_param_values", "set_all_param_values", ] def get_all_layers(layer, treat_as_input=None): """ This function gathers all layers below one or more given :class:`Layer` instances, including the given layer(s). Its main use is to collect all layers of a network just given the output layer(s). The layers are guaranteed to be returned in a topological order: a layer in the result list is always preceded by all layers its input depends on. Parameters ---------- layer : Layer or list the :class:`Layer` instance for which to gather all layers feeding into it, or a list of :class:`Layer` instances. treat_as_input : None or iterable an iterable of :class:`Layer` instances to treat as input layers with no layers feeding into them. They will show up in the result list, but their incoming layers will not be collected (unless they are required for other layers as well). Returns ------- list a list of :class:`Layer` instances feeding into the given instance(s) either directly or indirectly, and the given instance(s) themselves, in topological order. Examples -------- >>> from lasagne.layers import InputLayer, DenseLayer >>> l_in = InputLayer((100, 20)) >>> l1 = DenseLayer(l_in, num_units=50) >>> get_all_layers(l1) == [l_in, l1] True >>> l2 = DenseLayer(l_in, num_units=10) >>> get_all_layers([l2, l1]) == [l_in, l2, l1] True >>> get_all_layers([l1, l2]) == [l_in, l1, l2] True >>> l3 = DenseLayer(l2, num_units=20) >>> get_all_layers(l3) == [l_in, l2, l3] True >>> get_all_layers(l3, treat_as_input=[l2]) == [l2, l3] True """ # We perform a depth-first search. We add a layer to the result list only # after adding all its incoming layers (if any) or when detecting a cycle. # We use a LIFO stack to avoid ever running into recursion depth limits. try: queue = deque(layer) except TypeError: queue = deque([layer]) seen = set() done = set() result = [] # If treat_as_input is given, we pretend we've already collected all their # incoming layers. if treat_as_input is not None: seen.update(treat_as_input) while queue: # Peek at the leftmost node in the queue. layer = queue[0] if layer is None: # Some node had an input_layer set to `None`. Just ignore it. queue.popleft() elif layer not in seen: # We haven't seen this node yet: Mark it and queue all incomings # to be processed first. If there are no incomings, the node will # be appended to the result list in the next iteration. seen.add(layer) if hasattr(layer, 'input_layers'): queue.extendleft(reversed(layer.input_layers)) elif hasattr(layer, 'input_layer'): queue.appendleft(layer.input_layer) else: # We've been here before: Either we've finished all its incomings, # or we've detected a cycle. In both cases, we remove the layer # from the queue and append it to the result list. queue.popleft() if layer not in done: result.append(layer) done.add(layer) return result def get_output(layer_or_layers, inputs=None, **kwargs): """ Computes the output of the network at one or more given layers. Optionally, you can define the input(s) to propagate through the network instead of using the input variable(s) associated with the network's input layer(s). Parameters ---------- layer_or_layers : Layer or list the :class:`Layer` instance for which to compute the output expressions, or a list of :class:`Layer` instances. inputs : None, Theano expression, numpy array, or dict If None, uses the input variables associated with the :class:`InputLayer` instances. If a Theano expression, this defines the input for a single :class:`InputLayer` instance. Will throw a ValueError if there are multiple :class:`InputLayer` instances. If a numpy array, this will be wrapped as a Theano constant and used just like a Theano expression. If a dictionary, any :class:`Layer` instance (including the input layers) can be mapped to a Theano expression or numpy array to use instead of its regular output. Returns ------- output : Theano expression or list the output of the given layer(s) for the given network input Notes ----- Depending on your network architecture, `get_output([l1, l2])` may be crucially different from `[get_output(l1), get_output(l2)]`. Only the former ensures that the output expressions depend on the same intermediate expressions. For example, when `l1` and `l2` depend on a common dropout layer, the former will use the same dropout mask for both, while the latter will use two different dropout masks. """ from .input import InputLayer from .base import MergeLayer, Layer # check if the keys of the dictionary are valid if isinstance(inputs, dict): for input_key in inputs.keys(): if (input_key is not None) and (not isinstance(input_key, Layer)): raise TypeError("The inputs dictionary keys must be" " lasagne layers not %s." % type(input_key)) # track accepted kwargs used by get_output_for accepted_kwargs = {'deterministic'} # obtain topological ordering of all layers the output layer(s) depend on treat_as_input = inputs.keys() if isinstance(inputs, dict) else [] all_layers = get_all_layers(layer_or_layers, treat_as_input) # initialize layer-to-expression mapping from all input layers all_outputs = dict((layer, layer.input_var) for layer in all_layers if isinstance(layer, InputLayer) and layer not in treat_as_input) # update layer-to-expression mapping from given input(s), if any if isinstance(inputs, dict): all_outputs.update((layer, utils.as_theano_expression(expr)) for layer, expr in inputs.items()) elif inputs is not None: if len(all_outputs) > 1: raise ValueError("get_output() was called with a single input " "expression on a network with multiple input " "layers. Please call it with a dictionary of " "input expressions instead.") for input_layer in all_outputs: all_outputs[input_layer] = utils.as_theano_expression(inputs) # update layer-to-expression mapping by propagating the inputs for layer in all_layers: if layer not in all_outputs: try: if isinstance(layer, MergeLayer): layer_inputs = [all_outputs[input_layer] for input_layer in layer.input_layers] else: layer_inputs = all_outputs[layer.input_layer] except KeyError: # one of the input_layer attributes must have been `None` raise ValueError("get_output() was called without giving an " "input expression for the free-floating " "layer %r. Please call it with a dictionary " "mapping this layer to an input expression." % layer) all_outputs[layer] = layer.get_output_for(layer_inputs, **kwargs) try: accepted_kwargs |= set(utils.inspect_kwargs( layer.get_output_for)) except TypeError: # If introspection is not possible, skip it pass accepted_kwargs |= set(layer.get_output_kwargs) unused_kwargs = set(kwargs.keys()) - accepted_kwargs if unused_kwargs: suggestions = [] for kwarg in unused_kwargs: suggestion = get_close_matches(kwarg, accepted_kwargs) if suggestion: suggestions.append('%s (perhaps you meant %s)' % (kwarg, suggestion[0])) else: suggestions.append(kwarg) warn("get_output() was called with unused kwargs:\n\t%s" % "\n\t".join(suggestions)) # return the output(s) of the requested layer(s) only try: return [all_outputs[layer] for layer in layer_or_layers] except TypeError: return all_outputs[layer_or_layers] def get_output_shape(layer_or_layers, input_shapes=None): """ Computes the output shape of the network at one or more given layers. Parameters ---------- layer_or_layers : Layer or list the :class:`Layer` instance for which to compute the output shapes, or a list of :class:`Layer` instances. input_shapes : None, tuple, or dict If None, uses the input shapes associated with the :class:`InputLayer` instances. If a tuple, this defines the input shape for a single :class:`InputLayer` instance. Will throw a ValueError if there are multiple :class:`InputLayer` instances. If a dictionary, any :class:`Layer` instance (including the input layers) can be mapped to a shape tuple to use instead of its regular output shape. Returns ------- tuple or list the output shape of the given layer(s) for the given network input """ # shortcut: return precomputed shapes if we do not need to propagate any if input_shapes is None or input_shapes == {}: try: return [layer.output_shape for layer in layer_or_layers] except TypeError: return layer_or_layers.output_shape from .input import InputLayer from .base import MergeLayer # obtain topological ordering of all layers the output layer(s) depend on if isinstance(input_shapes, dict): treat_as_input = input_shapes.keys() else: treat_as_input = [] all_layers = get_all_layers(layer_or_layers, treat_as_input) # initialize layer-to-shape mapping from all input layers all_shapes = dict((layer, layer.shape) for layer in all_layers if isinstance(layer, InputLayer) and layer not in treat_as_input) # update layer-to-shape mapping from given input(s), if any if isinstance(input_shapes, dict): all_shapes.update(input_shapes) elif input_shapes is not None: if len(all_shapes) > 1: raise ValueError("get_output_shape() was called with a single " "input shape on a network with multiple input " "layers. Please call it with a dictionary of " "input shapes instead.") for input_layer in all_shapes: all_shapes[input_layer] = input_shapes # update layer-to-shape mapping by propagating the input shapes for layer in all_layers: if layer not in all_shapes: if isinstance(layer, MergeLayer): input_shapes = [all_shapes[input_layer] for input_layer in layer.input_layers] else: input_shapes = all_shapes[layer.input_layer] all_shapes[layer] = layer.get_output_shape_for(input_shapes) # return the output shape(s) of the requested layer(s) only try: return [all_shapes[layer] for layer in layer_or_layers] except TypeError: return all_shapes[layer_or_layers] def get_all_params(layer, unwrap_shared=True, **tags): """ Returns a list of Theano shared variables or expressions that parameterize the layer. This function gathers all parameters of all layers below one or more given :class:`Layer` instances, including the layer(s) itself. Its main use is to collect all parameters of a network just given the output layer(s). By default, all shared variables that participate in the forward pass will be returned. The list can optionally be filtered by specifying tags as keyword arguments. For example, ``trainable=True`` will only return trainable parameters, and ``regularizable=True`` will only return parameters that can be regularized (e.g., by L2 decay). Parameters ---------- layer : Layer or list The :class:`Layer` instance for which to gather all parameters, or a list of :class:`Layer` instances. unwrap_shared : bool (default: True) Affects only parameters that were set to a Theano expression. If ``True`` the function returns the shared variables contained in the expression, otherwise the Theano expression itself. **tags (optional) tags can be specified to filter the list. Specifying ``tag1=True`` will limit the list to parameters that are tagged with ``tag1``. Specifying ``tag1=False`` will limit the list to parameters that are not tagged with ``tag1``. Commonly used tags are ``regularizable`` and ``trainable``. Returns ------- params : list A list of Theano shared variables or expressions representing the parameters. Notes ----- If any of the layers' parameters was set to a Theano expression instead of a shared variable, `unwrap_shared` controls whether to return the shared variables involved in that expression (``unwrap_shared=True``, the default), or the expression itself (``unwrap_shared=False``). In either case, tag filtering applies to the expressions, considering all variables within an expression to be tagged the same. Examples -------- Collecting all parameters from a two-layer network: >>> from lasagne.layers import InputLayer, DenseLayer >>> l_in = InputLayer((100, 20)) >>> l1 = DenseLayer(l_in, num_units=50) >>> l2 = DenseLayer(l1, num_units=30) >>> all_params = get_all_params(l2) >>> all_params == [l1.W, l1.b, l2.W, l2.b] True Parameters can be filtered by tags, and parameter expressions are unwrapped to return involved shared variables by default: >>> from lasagne.utils import floatX >>> w1 = theano.shared(floatX(.01 * np.random.randn(50, 30))) >>> w2 = theano.shared(floatX(1)) >>> l2 = DenseLayer(l1, num_units=30, W=theano.tensor.exp(w1) - w2, b=None) >>> all_params = get_all_params(l2, regularizable=True) >>> all_params == [l1.W, w1, w2] True When disabling unwrapping, the expression for ``l2.W`` is returned instead: >>> all_params = get_all_params(l2, regularizable=True, ... unwrap_shared=False) >>> all_params == [l1.W, l2.W] True """ layers = get_all_layers(layer) params = chain.from_iterable(l.get_params( unwrap_shared=unwrap_shared, **tags) for l in layers) return utils.unique(params) def count_params(layer, **tags): """ This function counts all parameters (i.e., the number of scalar values) of all layers below one or more given :class:`Layer` instances, including the layer(s) itself. This is useful to compare the capacity of various network architectures. All parameters returned by the :class:`Layer`s' `get_params` methods are counted. Parameters ---------- layer : Layer or list The :class:`Layer` instance for which to count the parameters, or a list of :class:`Layer` instances. **tags (optional) tags can be specified to filter the list of parameter variables that will be included in the count. Specifying ``tag1=True`` will limit the list to parameters that are tagged with ``tag1``. Specifying ``tag1=False`` will limit the list to parameters that are not tagged with ``tag1``. Commonly used tags are ``regularizable`` and ``trainable``. Returns ------- int The total number of learnable parameters. Examples -------- >>> from lasagne.layers import InputLayer, DenseLayer >>> l_in = InputLayer((100, 20)) >>> l1 = DenseLayer(l_in, num_units=50) >>> param_count = count_params(l1) >>> param_count 1050 >>> param_count == 20 * 50 + 50 # 20 input * 50 units + 50 biases True """ params = get_all_params(layer, **tags) shapes = [p.get_value().shape for p in params] counts = [np.prod(shape) for shape in shapes] return sum(counts) def get_all_param_values(layer, **tags): """ This function returns the values of the parameters of all layers below one or more given :class:`Layer` instances, including the layer(s) itself. This function can be used in conjunction with set_all_param_values to save and restore model parameters. Parameters ---------- layer : Layer or list The :class:`Layer` instance for which to gather all parameter values, or a list of :class:`Layer` instances. **tags (optional) tags can be specified to filter the list. Specifying ``tag1=True`` will limit the list to parameters that are tagged with ``tag1``. Specifying ``tag1=False`` will limit the list to parameters that are not tagged with ``tag1``. Commonly used tags are ``regularizable`` and ``trainable``. Returns ------- list of numpy.array A list of numpy arrays representing the parameter values. Examples -------- >>> from lasagne.layers import InputLayer, DenseLayer >>> l_in = InputLayer((100, 20)) >>> l1 = DenseLayer(l_in, num_units=50) >>> all_param_values = get_all_param_values(l1) >>> (all_param_values[0] == l1.W.get_value()).all() True >>> (all_param_values[1] == l1.b.get_value()).all() True """ params = get_all_params(layer, **tags) return [p.get_value() for p in params] def set_all_param_values(layer, values, **tags): """ Given a list of numpy arrays, this function sets the parameters of all layers below one or more given :class:`Layer` instances (including the layer(s) itself) to the given values. This function can be used in conjunction with get_all_param_values to save and restore model parameters. Parameters ---------- layer : Layer or list The :class:`Layer` instance for which to set all parameter values, or a list of :class:`Layer` instances. values : list of numpy.array A list of numpy arrays representing the parameter values, must match the number of parameters. Every parameter's shape must match the shape of its new value. **tags (optional) tags can be specified to filter the list of parameters to be set. Specifying ``tag1=True`` will limit the list to parameters that are tagged with ``tag1``. Specifying ``tag1=False`` will limit the list to parameters that are not tagged with ``tag1``. Commonly used tags are ``regularizable`` and ``trainable``. Raises ------ ValueError If the number of values is not equal to the number of params, or if a parameter's shape does not match the shape of its new value. Examples -------- >>> from lasagne.layers import InputLayer, DenseLayer >>> l_in = InputLayer((100, 20)) >>> l1 = DenseLayer(l_in, num_units=50) >>> all_param_values = get_all_param_values(l1) >>> # all_param_values is now [l1.W.get_value(), l1.b.get_value()] >>> # ... >>> set_all_param_values(l1, all_param_values) >>> # the parameter values are restored. """ params = get_all_params(layer, **tags) if len(params) != len(values): raise ValueError("mismatch: got %d values to set %d parameters" % (len(values), len(params))) for p, v in zip(params, values): if p.get_value().shape != v.shape: raise ValueError("mismatch: parameter has shape %r but value to " "set has shape %r" % (p.get_value().shape, v.shape)) else: p.set_value(v) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/__init__.py0000644000175000017500000000046613307306052026107 0ustar sinclairssinclairsfrom .base import * from .helper import * from .input import * from .dense import * from .noise import * from .conv import * from .local import * from .pool import * from .shape import * from .merge import * from .normalization import * from .embedding import * from .recurrent import * from .special import * Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/special.py0000644000175000017500000013577213307306052026001 0ustar sinclairssinclairsimport theano import theano.tensor as T import numpy as np from .. import init from .. import nonlinearities from ..utils import as_tuple, floatX, int_types from ..random import get_rng from .base import Layer, MergeLayer from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams __all__ = [ "NonlinearityLayer", "BiasLayer", "ScaleLayer", "standardize", "ExpressionLayer", "InverseLayer", "TransformerLayer", "TPSTransformerLayer", "ParametricRectifierLayer", "prelu", "RandomizedRectifierLayer", "rrelu", ] class NonlinearityLayer(Layer): """ lasagne.layers.NonlinearityLayer(incoming, nonlinearity=lasagne.nonlinearities.rectify, **kwargs) A layer that just applies a nonlinearity. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. """ def __init__(self, incoming, nonlinearity=nonlinearities.rectify, **kwargs): super(NonlinearityLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (nonlinearities.identity if nonlinearity is None else nonlinearity) def get_output_for(self, input, **kwargs): return self.nonlinearity(input) class BiasLayer(Layer): """ lasagne.layers.BiasLayer(incoming, b=lasagne.init.Constant(0), shared_axes='auto', **kwargs) A layer that just adds a (trainable) bias term. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases and pass through its input unchanged. Otherwise, the bias shape must match the incoming shape, skipping those axes the biases are shared over (see the example below). See :func:`lasagne.utils.create_param` for more information. shared_axes : 'auto', int or tuple of int The axis or axes to share biases over. If ``'auto'`` (the default), share over all axes except for the second: this will share biases over the minibatch dimension for dense layers, and additionally over all spatial dimensions for convolutional layers. Notes ----- The bias parameter dimensionality is the input dimensionality minus the number of axes the biases are shared over, which matches the bias parameter conventions of :class:`DenseLayer` or :class:`Conv2DLayer`. For example: >>> layer = BiasLayer((20, 30, 40, 50), shared_axes=(0, 2)) >>> layer.b.get_value().shape (30, 50) """ def __init__(self, incoming, b=init.Constant(0), shared_axes='auto', **kwargs): super(BiasLayer, self).__init__(incoming, **kwargs) if shared_axes == 'auto': # default: share biases over all but the second axis shared_axes = (0,) + tuple(range(2, len(self.input_shape))) elif isinstance(shared_axes, int_types): shared_axes = (shared_axes,) self.shared_axes = shared_axes if b is None: self.b = None else: # create bias parameter, ignoring all dimensions in shared_axes shape = [size for axis, size in enumerate(self.input_shape) if axis not in self.shared_axes] if any(size is None for size in shape): raise ValueError("BiasLayer needs specified input sizes for " "all axes that biases are not shared over.") self.b = self.add_param(b, shape, 'b', regularizable=False) def get_output_for(self, input, **kwargs): if self.b is not None: bias_axes = iter(range(self.b.ndim)) pattern = ['x' if input_axis in self.shared_axes else next(bias_axes) for input_axis in range(input.ndim)] return input + self.b.dimshuffle(*pattern) else: return input class ScaleLayer(Layer): """ lasagne.layers.ScaleLayer(incoming, scales=lasagne.init.Constant(1), shared_axes='auto', **kwargs) A layer that scales its inputs by learned coefficients. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape scales : Theano shared variable, expression, numpy array, or callable Initial value, expression or initializer for the scale. The scale shape must match the incoming shape, skipping those axes the scales are shared over (see the example below). See :func:`lasagne.utils.create_param` for more information. shared_axes : 'auto', int or tuple of int The axis or axes to share scales over. If ``'auto'`` (the default), share over all axes except for the second: this will share scales over the minibatch dimension for dense layers, and additionally over all spatial dimensions for convolutional layers. Notes ----- The scales parameter dimensionality is the input dimensionality minus the number of axes the scales are shared over, which matches the bias parameter conventions of :class:`DenseLayer` or :class:`Conv2DLayer`. For example: >>> layer = ScaleLayer((20, 30, 40, 50), shared_axes=(0, 2)) >>> layer.scales.get_value().shape (30, 50) """ def __init__(self, incoming, scales=init.Constant(1), shared_axes='auto', **kwargs): super(ScaleLayer, self).__init__(incoming, **kwargs) if shared_axes == 'auto': # default: share scales over all but the second axis shared_axes = (0,) + tuple(range(2, len(self.input_shape))) elif isinstance(shared_axes, int_types): shared_axes = (shared_axes,) self.shared_axes = shared_axes # create scales parameter, ignoring all dimensions in shared_axes shape = [size for axis, size in enumerate(self.input_shape) if axis not in self.shared_axes] if any(size is None for size in shape): raise ValueError("ScaleLayer needs specified input sizes for " "all axes that scales are not shared over.") self.scales = self.add_param( scales, shape, 'scales', regularizable=False) def get_output_for(self, input, **kwargs): axes = iter(range(self.scales.ndim)) pattern = ['x' if input_axis in self.shared_axes else next(axes) for input_axis in range(input.ndim)] return input * self.scales.dimshuffle(*pattern) def standardize(layer, offset, scale, shared_axes='auto'): """ Convenience function for standardizing inputs by applying a fixed offset and scale. This is usually useful when you want the input to your network to, say, have zero mean and unit standard deviation over the feature dimensions. This layer allows you to include the appropriate statistics to achieve this normalization as part of your network, and applies them to its input. The statistics are supplied as the `offset` and `scale` parameters, which are applied to the input by subtracting `offset` and dividing by `scale`, sharing dimensions as specified by the `shared_axes` argument. Parameters ---------- layer : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. offset : Theano shared variable or numpy array The offset to apply (via subtraction) to the axis/axes being standardized. scale : Theano shared variable or numpy array The scale to apply (via division) to the axis/axes being standardized. shared_axes : 'auto', int or tuple of int The axis or axes to share the offset and scale over. If ``'auto'`` (the default), share over all axes except for the second: this will share scales over the minibatch dimension for dense layers, and additionally over all spatial dimensions for convolutional layers. Examples -------- Assuming your training data exists in a 2D numpy ndarray called ``training_data``, you can use this function to scale input features to the [0, 1] range based on the training set statistics like so: >>> import lasagne >>> import numpy as np >>> training_data = np.random.standard_normal((100, 20)) >>> input_shape = (None, training_data.shape[1]) >>> l_in = lasagne.layers.InputLayer(input_shape) >>> offset = training_data.min(axis=0) >>> scale = training_data.max(axis=0) - training_data.min(axis=0) >>> l_std = standardize(l_in, offset, scale, shared_axes=0) Alternatively, to z-score your inputs based on training set statistics, you could set ``offset = training_data.mean(axis=0)`` and ``scale = training_data.std(axis=0)`` instead. """ # Subtract the offset layer = BiasLayer(layer, -offset, shared_axes) # Do not optimize the offset parameter layer.params[layer.b].remove('trainable') # Divide by the scale layer = ScaleLayer(layer, floatX(1.)/scale, shared_axes) # Do not optimize the scales parameter layer.params[layer.scales].remove('trainable') return layer class ExpressionLayer(Layer): """ This layer provides boilerplate for a custom layer that applies a simple transformation to the input. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. function : callable A function to be applied to the output of the previous layer. output_shape : None, callable, tuple, or 'auto' Specifies the output shape of this layer. If a tuple, this fixes the output shape for any input shape (the tuple can contain None if some dimensions may vary). If a callable, it should return the calculated output shape given the input shape. If None, the output shape is assumed to be the same as the input shape. If 'auto', an attempt will be made to automatically infer the correct output shape. Notes ----- An :class:`ExpressionLayer` that does not change the shape of the data (i.e., is constructed with the default setting of ``output_shape=None``) is functionally equivalent to a :class:`NonlinearityLayer`. Examples -------- >>> from lasagne.layers import InputLayer, ExpressionLayer >>> l_in = InputLayer((32, 100, 20)) >>> l1 = ExpressionLayer(l_in, lambda X: X.mean(-1), output_shape='auto') >>> l1.output_shape (32, 100) """ def __init__(self, incoming, function, output_shape=None, **kwargs): super(ExpressionLayer, self).__init__(incoming, **kwargs) if output_shape is None: self._output_shape = None elif output_shape == 'auto': self._output_shape = 'auto' elif hasattr(output_shape, '__call__'): self.get_output_shape_for = output_shape else: self._output_shape = tuple(output_shape) self.function = function def get_output_shape_for(self, input_shape): if self._output_shape is None: return input_shape elif self._output_shape is 'auto': input_shape = (0 if s is None else s for s in input_shape) X = theano.tensor.alloc(0, *input_shape) output_shape = self.function(X).shape.eval() output_shape = tuple(s if s else None for s in output_shape) return output_shape else: return self._output_shape def get_output_for(self, input, **kwargs): return self.function(input) class InverseLayer(MergeLayer): """ The :class:`InverseLayer` class performs inverse operations for a single layer of a neural network by applying the partial derivative of the layer to be inverted with respect to its input: transposed layer for a :class:`DenseLayer`, deconvolutional layer for :class:`Conv2DLayer`, :class:`Conv1DLayer`; or an unpooling layer for :class:`MaxPool2DLayer`. It is specially useful for building (convolutional) autoencoders with tied parameters. Note that if the layer to be inverted contains a nonlinearity and/or a bias, the :class:`InverseLayer` will include the derivative of that in its computation. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. layer : a :class:`Layer` instance or a tuple The layer with respect to which the instance of the :class:`InverseLayer` is inverse to. Examples -------- >>> import lasagne >>> from lasagne.layers import InputLayer, Conv2DLayer, DenseLayer >>> from lasagne.layers import InverseLayer >>> l_in = InputLayer((100, 3, 28, 28)) >>> l1 = Conv2DLayer(l_in, num_filters=16, filter_size=5) >>> l2 = DenseLayer(l1, num_units=20) >>> l_u2 = InverseLayer(l2, l2) # backprop through l2 >>> l_u1 = InverseLayer(l_u2, l1) # backprop through l1 """ def __init__(self, incoming, layer, **kwargs): super(InverseLayer, self).__init__( [incoming, layer, layer.input_layer], **kwargs) def get_output_shape_for(self, input_shapes): return input_shapes[2] def get_output_for(self, inputs, **kwargs): input, layer_out, layer_in = inputs return theano.grad(None, wrt=layer_in, known_grads={layer_out: input}) class TransformerLayer(MergeLayer): """ Spatial transformer layer The layer applies an affine transformation on the input. The affine transformation is parameterized with six learned parameters [1]_. The output is interpolated with a bilinear transformation. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. The output of this layer should be a 4D tensor, with shape ``(batch_size, num_input_channels, input_rows, input_columns)``. localization_network : a :class:`Layer` instance The network that calculates the parameters of the affine transformation. See the example for how to initialize to the identity transform. downsample_factor : float or iterable of float A float or a 2-element tuple specifying the downsample factor for the output image (in both spatial dimensions). A value of 1 will keep the original size of the input. Values larger than 1 will downsample the input. Values below 1 will upsample the input. border_mode : 'nearest', 'mirror', or 'wrap' Determines how border conditions are handled during interpolation. If 'nearest', points outside the grid are clipped to the boundary. If 'mirror', points are mirrored across the boundary. If 'wrap', points wrap around to the other side of the grid. See http://stackoverflow.com/q/22669252/22670830#22670830 for details. References ---------- .. [1] Max Jaderberg, Karen Simonyan, Andrew Zisserman, Koray Kavukcuoglu (2015): Spatial Transformer Networks. NIPS 2015, http://papers.nips.cc/paper/5854-spatial-transformer-networks.pdf Examples -------- Here we set up the layer to initially do the identity transform, similarly to [1]_. Note that you will want to use a localization with linear output. If the output from the localization networks is [t1, t2, t3, t4, t5, t6] then t1 and t5 determines zoom, t2 and t4 determines skewness, and t3 and t6 move the center position. >>> import numpy as np >>> import lasagne >>> b = np.zeros((2, 3), dtype='float32') >>> b[0, 0] = 1 >>> b[1, 1] = 1 >>> b = b.flatten() # identity transform >>> W = lasagne.init.Constant(0.0) >>> l_in = lasagne.layers.InputLayer((None, 3, 28, 28)) >>> l_loc = lasagne.layers.DenseLayer(l_in, num_units=6, W=W, b=b, ... nonlinearity=None) >>> l_trans = lasagne.layers.TransformerLayer(l_in, l_loc) """ def __init__(self, incoming, localization_network, downsample_factor=1, border_mode='nearest', **kwargs): super(TransformerLayer, self).__init__( [incoming, localization_network], **kwargs) self.downsample_factor = as_tuple(downsample_factor, 2) self.border_mode = border_mode input_shp, loc_shp = self.input_shapes if loc_shp[-1] != 6 or len(loc_shp) != 2: raise ValueError("The localization network must have " "output shape: (batch_size, 6)") if len(input_shp) != 4: raise ValueError("The input network must have a 4-dimensional " "output shape: (batch_size, num_input_channels, " "input_rows, input_columns)") def get_output_shape_for(self, input_shapes): shape = input_shapes[0] factors = self.downsample_factor return (shape[:2] + tuple(None if s is None else int(s // f) for s, f in zip(shape[2:], factors))) def get_output_for(self, inputs, **kwargs): # see eq. (1) and sec 3.1 in [1] input, theta = inputs return _transform_affine(theta, input, self.downsample_factor, self.border_mode) def _transform_affine(theta, input, downsample_factor, border_mode): num_batch, num_channels, height, width = input.shape theta = T.reshape(theta, (-1, 2, 3)) # grid of (x_t, y_t, 1), eq (1) in ref [1] out_height = T.cast(height // downsample_factor[0], 'int64') out_width = T.cast(width // downsample_factor[1], 'int64') grid = _meshgrid(out_height, out_width) # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) T_g = T.dot(theta, grid) x_s = T_g[:, 0] y_s = T_g[:, 1] x_s_flat = x_s.flatten() y_s_flat = y_s.flatten() # dimshuffle input to (bs, height, width, channels) input_dim = input.dimshuffle(0, 2, 3, 1) input_transformed = _interpolate( input_dim, x_s_flat, y_s_flat, out_height, out_width, border_mode) output = T.reshape( input_transformed, (num_batch, out_height, out_width, num_channels)) output = output.dimshuffle(0, 3, 1, 2) # dimshuffle to conv format return output def _interpolate(im, x, y, out_height, out_width, border_mode): # *_f are floats num_batch, height, width, channels = im.shape height_f = T.cast(height, theano.config.floatX) width_f = T.cast(width, theano.config.floatX) # scale coordinates from [-1, 1] to [0, width/height - 1] x = (x + 1) / 2 * (width_f - 1) y = (y + 1) / 2 * (height_f - 1) # obtain indices of the 2x2 pixel neighborhood surrounding the coordinates; # we need those in floatX for interpolation and in int64 for indexing. x0_f = T.floor(x) y0_f = T.floor(y) x1_f = x0_f + 1 y1_f = y0_f + 1 # for indexing, we need to take care of the border mode for outside pixels. if border_mode == 'nearest': x0 = T.clip(x0_f, 0, width_f - 1) x1 = T.clip(x1_f, 0, width_f - 1) y0 = T.clip(y0_f, 0, height_f - 1) y1 = T.clip(y1_f, 0, height_f - 1) elif border_mode == 'mirror': w = 2 * (width_f - 1) x0 = T.minimum(x0_f % w, -x0_f % w) x1 = T.minimum(x1_f % w, -x1_f % w) h = 2 * (height_f - 1) y0 = T.minimum(y0_f % h, -y0_f % h) y1 = T.minimum(y1_f % h, -y1_f % h) elif border_mode == 'wrap': x0 = T.mod(x0_f, width_f) x1 = T.mod(x1_f, width_f) y0 = T.mod(y0_f, height_f) y1 = T.mod(y1_f, height_f) else: raise ValueError("border_mode must be one of " "'nearest', 'mirror', 'wrap'") x0, x1, y0, y1 = (T.cast(v, 'int64') for v in (x0, x1, y0, y1)) # The input is [num_batch, height, width, channels]. We do the lookup in # the flattened input, i.e [num_batch*height*width, channels]. We need # to offset all indices to match the flat version dim2 = width dim1 = width*height base = T.repeat( T.arange(num_batch, dtype='int64')*dim1, out_height*out_width) base_y0 = base + y0*dim2 base_y1 = base + y1*dim2 idx_a = base_y0 + x0 idx_b = base_y1 + x0 idx_c = base_y0 + x1 idx_d = base_y1 + x1 # use indices to lookup pixels for all samples im_flat = im.reshape((-1, channels)) Ia = im_flat[idx_a] Ib = im_flat[idx_b] Ic = im_flat[idx_c] Id = im_flat[idx_d] # calculate interpolated values wa = ((x1_f-x) * (y1_f-y)).dimshuffle(0, 'x') wb = ((x1_f-x) * (y-y0_f)).dimshuffle(0, 'x') wc = ((x-x0_f) * (y1_f-y)).dimshuffle(0, 'x') wd = ((x-x0_f) * (y-y0_f)).dimshuffle(0, 'x') output = T.sum([wa*Ia, wb*Ib, wc*Ic, wd*Id], axis=0) return output def _linspace(start, stop, num): # Theano linspace. Behaves similar to np.linspace start = T.cast(start, theano.config.floatX) stop = T.cast(stop, theano.config.floatX) num = T.cast(num, theano.config.floatX) step = (stop-start)/(num-1) return T.arange(num, dtype=theano.config.floatX)*step+start def _meshgrid(height, width): # This function is the grid generator from eq. (1) in reference [1]. # It is equivalent to the following numpy code: # x_t, y_t = np.meshgrid(np.linspace(-1, 1, width), # np.linspace(-1, 1, height)) # ones = np.ones(np.prod(x_t.shape)) # grid = np.vstack([x_t.flatten(), y_t.flatten(), ones]) # It is implemented in Theano instead to support symbolic grid sizes. # Note: If the image size is known at layer construction time, we could # compute the meshgrid offline in numpy instead of doing it dynamically # in Theano. However, it hardly affected performance when we tried. x_t = T.dot(T.ones((height, 1)), _linspace(-1.0, 1.0, width).dimshuffle('x', 0)) y_t = T.dot(_linspace(-1.0, 1.0, height).dimshuffle(0, 'x'), T.ones((1, width))) x_t_flat = x_t.reshape((1, -1)) y_t_flat = y_t.reshape((1, -1)) ones = T.ones_like(x_t_flat) grid = T.concatenate([x_t_flat, y_t_flat, ones], axis=0) return grid class TPSTransformerLayer(MergeLayer): """ Spatial transformer layer The layer applies a thin plate spline transformation [2]_ on the input as in [1]_. The thin plate spline transform is determined based on the movement of some number of control points. The starting positions for these control points are fixed. The output is interpolated with a bilinear transformation. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. The output of this layer should be a 4D tensor, with shape ``(batch_size, num_input_channels, input_rows, input_columns)``. localization_network : a :class:`Layer` instance The network that calculates the parameters of the thin plate spline transformation as the x and y coordinates of the destination offsets of each control point. The output of the localization network should be a 2D tensor, with shape ``(batch_size, 2 * num_control_points)`` downsample_factor : float or iterable of float A float or a 2-element tuple specifying the downsample factor for the output image (in both spatial dimensions). A value of 1 will keep the original size of the input. Values larger than 1 will downsample the input. Values below 1 will upsample the input. control_points : integer The number of control points to be used for the thin plate spline transformation. These points will be arranged as a grid along the image, so the value must be a perfect square. Default is 16. precompute_grid : 'auto' or boolean Flag to precompute the U function [2]_ for the grid and source points. If 'auto', will be set to true as long as the input height and width are specified. If true, the U function is computed when the layer is constructed for a fixed input shape. If false, grid will be computed as part of the Theano computational graph, which is substantially slower as this computation scales with num_pixels*num_control_points. Default is 'auto'. border_mode : 'nearest', 'mirror', or 'wrap' Determines how border conditions are handled during interpolation. If 'nearest', points outside the grid are clipped to the boundary'. If 'mirror', points are mirrored across the boundary. If 'wrap', points wrap around to the other side of the grid. See http://stackoverflow.com/q/22669252/22670830#22670830 for details. References ---------- .. [1] Max Jaderberg, Karen Simonyan, Andrew Zisserman, Koray Kavukcuoglu (2015): Spatial Transformer Networks. NIPS 2015, http://papers.nips.cc/paper/5854-spatial-transformer-networks.pdf .. [2] Fred L. Bookstein (1989): Principal warps: thin-plate splines and the decomposition of deformations. IEEE Transactions on Pattern Analysis and Machine Intelligence. http://doi.org/10.1109/34.24792 Examples -------- Here, we'll implement an identity transform using a thin plate spline transform. First we'll create the destination control point offsets. To make everything invariant to the shape of the image, the x and y range of the image is normalized to [-1, 1] as in ref [1]_. To replicate an identity transform, we'll set the bias to have all offsets be 0. More complicated transformations can easily be implemented using different x and y offsets (importantly, each control point can have it's own pair of offsets). >>> import numpy as np >>> import lasagne >>> >>> # Create the network >>> # we'll initialize the weights and biases to zero, so it starts >>> # as the identity transform (all control point offsets are zero) >>> W = b = lasagne.init.Constant(0.0) >>> >>> # Set the number of points >>> num_points = 16 >>> >>> l_in = lasagne.layers.InputLayer((None, 3, 28, 28)) >>> l_loc = lasagne.layers.DenseLayer(l_in, num_units=2*num_points, ... W=W, b=b, nonlinearity=None) >>> l_trans = lasagne.layers.TPSTransformerLayer(l_in, l_loc, ... control_points=num_points) """ def __init__(self, incoming, localization_network, downsample_factor=1, control_points=16, precompute_grid='auto', border_mode='nearest', **kwargs): super(TPSTransformerLayer, self).__init__( [incoming, localization_network], **kwargs) self.border_mode = border_mode self.downsample_factor = as_tuple(downsample_factor, 2) self.control_points = control_points input_shp, loc_shp = self.input_shapes # Error checking if loc_shp[-1] != 2 * control_points or len(loc_shp) != 2: raise ValueError("The localization network must have " "output shape: (batch_size, " "2*control_points)") if round(np.sqrt(control_points)) != np.sqrt( control_points): raise ValueError("The number of control points must be" " a perfect square.") if len(input_shp) != 4: raise ValueError("The input network must have a 4-dimensional " "output shape: (batch_size, num_input_channels, " "input_rows, input_columns)") # Process precompute grid can_precompute_grid = all(s is not None for s in input_shp[2:]) if precompute_grid == 'auto': precompute_grid = can_precompute_grid elif precompute_grid and not can_precompute_grid: raise ValueError("Grid can only be precomputed if the input " "height and width are pre-specified.") self.precompute_grid = precompute_grid # Create source points and L matrix self.right_mat, self.L_inv, self.source_points, self.out_height, \ self.out_width = _initialize_tps( control_points, input_shp, self.downsample_factor, precompute_grid) def get_output_shape_for(self, input_shapes): shape = input_shapes[0] factors = self.downsample_factor return (shape[:2] + tuple(None if s is None else int(s // f) for s, f in zip(shape[2:], factors))) def get_output_for(self, inputs, **kwargs): # see eq. (1) and sec 3.1 in [1] # Get input and destination control points input, dest_offsets = inputs return _transform_thin_plate_spline( dest_offsets, input, self.right_mat, self.L_inv, self.source_points, self.out_height, self.out_width, self.precompute_grid, self.downsample_factor, self.border_mode) def _transform_thin_plate_spline( dest_offsets, input, right_mat, L_inv, source_points, out_height, out_width, precompute_grid, downsample_factor, border_mode): num_batch, num_channels, height, width = input.shape num_control_points = source_points.shape[1] # reshape destination offsets to be (num_batch, 2, num_control_points) # and add to source_points dest_points = source_points + T.reshape( dest_offsets, (num_batch, 2, num_control_points)) # Solve as in ref [2] coefficients = T.dot(dest_points, L_inv[:, 3:].T) if precompute_grid: # Transform each point on the source grid (image_size x image_size) right_mat = T.tile(right_mat.dimshuffle('x', 0, 1), (num_batch, 1, 1)) transformed_points = T.batched_dot(coefficients, right_mat) else: # Transformed grid out_height = T.cast(height // downsample_factor[0], 'int64') out_width = T.cast(width // downsample_factor[1], 'int64') orig_grid = _meshgrid(out_height, out_width) orig_grid = orig_grid[0:2, :] orig_grid = T.tile(orig_grid, (num_batch, 1, 1)) # Transform each point on the source grid (image_size x image_size) transformed_points = _get_transformed_points_tps( orig_grid, source_points, coefficients, num_control_points, num_batch) # Get out new points x_transformed = transformed_points[:, 0].flatten() y_transformed = transformed_points[:, 1].flatten() # dimshuffle input to (bs, height, width, channels) input_dim = input.dimshuffle(0, 2, 3, 1) input_transformed = _interpolate( input_dim, x_transformed, y_transformed, out_height, out_width, border_mode) output = T.reshape(input_transformed, (num_batch, out_height, out_width, num_channels)) output = output.dimshuffle(0, 3, 1, 2) # dimshuffle to conv format return output def _get_transformed_points_tps(new_points, source_points, coefficients, num_points, batch_size): """ Calculates the transformed points' value using the provided coefficients :param new_points: num_batch x 2 x num_to_transform tensor :param source_points: 2 x num_points array of source points :param coefficients: coefficients (should be shape (num_batch, 2, control_points + 3)) :param num_points: the number of points :return: the x and y coordinates of each transformed point. Shape ( num_batch, 2, num_to_transform) """ # Calculate the U function for the new point and each source point as in # ref [2] # The U function is simply U(r) = r^2 * log(r^2), where r^2 is the # squared distance # Calculate the squared dist between the new point and the source points to_transform = new_points.dimshuffle(0, 'x', 1, 2) stacked_transform = T.tile(to_transform, (1, num_points, 1, 1)) r_2 = T.sum(((stacked_transform - source_points.dimshuffle( 'x', 1, 0, 'x')) ** 2), axis=2) # Take the product (r^2 * log(r^2)), being careful to avoid NaNs log_r_2 = T.log(r_2) distances = T.switch(T.isnan(log_r_2), r_2 * log_r_2, 0.) # Add in the coefficients for the affine translation (1, x, and y, # corresponding to a_1, a_x, and a_y) upper_array = T.concatenate([T.ones((batch_size, 1, new_points.shape[2]), dtype=theano.config.floatX), new_points], axis=1) right_mat = T.concatenate([upper_array, distances], axis=1) # Calculate the new value as the dot product new_value = T.batched_dot(coefficients, right_mat) return new_value def _U_func_numpy(x1, y1, x2, y2): """ Function which implements the U function from Bookstein paper :param x1: x coordinate of the first point :param y1: y coordinate of the first point :param x2: x coordinate of the second point :param y2: y coordinate of the second point :return: value of z """ # Return zero if same point if x1 == x2 and y1 == y2: return 0. # Calculate the squared Euclidean norm (r^2) r_2 = (x2 - x1) ** 2 + (y2 - y1) ** 2 # Return the squared norm (r^2 * log r^2) return r_2 * np.log(r_2) def _initialize_tps(num_control_points, input_shape, downsample_factor, precompute_grid): """ Initializes the thin plate spline calculation by creating the source point array and the inverted L matrix used for calculating the transformations as in ref [2]_ :param num_control_points: the number of control points. Must be a perfect square. Points will be used to generate an evenly spaced grid. :param input_shape: tuple with 4 elements specifying the input shape :param downsample_factor: tuple with 2 elements specifying the downsample for the height and width, respectively :param precompute_grid: boolean specifying whether to precompute the grid matrix :return: right_mat: shape (num_control_points + 3, out_height*out_width) tensor L_inv: shape (num_control_points + 3, num_control_points + 3) tensor source_points: shape (2, num_control_points) tensor out_height: tensor constant specifying the ouptut height out_width: tensor constant specifying the output width """ # break out input_shape _, _, height, width = input_shape # Create source grid grid_size = np.sqrt(num_control_points) x_control_source, y_control_source = np.meshgrid( np.linspace(-1, 1, grid_size), np.linspace(-1, 1, grid_size)) # Create 2 x num_points array of source points source_points = np.vstack( (x_control_source.flatten(), y_control_source.flatten())) # Convert to floatX source_points = source_points.astype(theano.config.floatX) # Get number of equations num_equations = num_control_points + 3 # Initialize L to be num_equations square matrix L = np.zeros((num_equations, num_equations), dtype=theano.config.floatX) # Create P matrix components L[0, 3:num_equations] = 1. L[1:3, 3:num_equations] = source_points L[3:num_equations, 0] = 1. L[3:num_equations, 1:3] = source_points.T # Loop through each pair of points and create the K matrix for point_1 in range(num_control_points): for point_2 in range(point_1, num_control_points): L[point_1 + 3, point_2 + 3] = _U_func_numpy( source_points[0, point_1], source_points[1, point_1], source_points[0, point_2], source_points[1, point_2]) if point_1 != point_2: L[point_2 + 3, point_1 + 3] = L[point_1 + 3, point_2 + 3] # Invert L_inv = np.linalg.inv(L) if precompute_grid: # Construct grid out_height = np.array(height // downsample_factor[0]).astype('int64') out_width = np.array(width // downsample_factor[1]).astype('int64') x_t, y_t = np.meshgrid(np.linspace(-1, 1, out_width), np.linspace(-1, 1, out_height)) ones = np.ones(np.prod(x_t.shape)) orig_grid = np.vstack([x_t.flatten(), y_t.flatten(), ones]) orig_grid = orig_grid[0:2, :] orig_grid = orig_grid.astype(theano.config.floatX) # Construct right mat # First Calculate the U function for the new point and each source # point as in ref [2] # The U function is simply U(r) = r^2 * log(r^2), where r^2 is the # squared distance to_transform = orig_grid[:, :, np.newaxis].transpose(2, 0, 1) stacked_transform = np.tile(to_transform, (num_control_points, 1, 1)) stacked_source_points = \ source_points[:, :, np.newaxis].transpose(1, 0, 2) r_2 = np.sum((stacked_transform - stacked_source_points) ** 2, axis=1) # Take the product (r^2 * log(r^2)), being careful to avoid NaNs log_r_2 = np.log(r_2) log_r_2[np.isinf(log_r_2)] = 0. distances = r_2 * log_r_2 # Add in the coefficients for the affine translation (1, x, and y, # corresponding to a_1, a_x, and a_y) upper_array = np.ones(shape=(1, orig_grid.shape[1]), dtype=theano.config.floatX) upper_array = np.concatenate([upper_array, orig_grid], axis=0) right_mat = np.concatenate([upper_array, distances], axis=0) # Convert to tensors out_height = T.as_tensor_variable(out_height) out_width = T.as_tensor_variable(out_width) right_mat = T.as_tensor_variable(right_mat) else: out_height = None out_width = None right_mat = None # Convert to tensors L_inv = T.as_tensor_variable(L_inv) source_points = T.as_tensor_variable(source_points) return right_mat, L_inv, source_points, out_height, out_width class ParametricRectifierLayer(Layer): """ lasagne.layers.ParametricRectifierLayer(incoming, alpha=init.Constant(0.25), shared_axes='auto', **kwargs) A layer that applies parametric rectify nonlinearity to its input following [1]_. Equation for the parametric rectifier linear unit: :math:`\\varphi(x) = \\max(x,0) + \\alpha \\min(x,0)` Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape alpha : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the alpha values. The shape must match the incoming shape, skipping those axes the alpha values are shared over (see the example below). See :func:`lasagne.utils.create_param` for more information. shared_axes : 'auto', 'all', int or tuple of int The axes along which the parameters of the rectifier units are going to be shared. If ``'auto'`` (the default), share over all axes except for the second - this will share the parameter over the minibatch dimension for dense layers, and additionally over all spatial dimensions for convolutional layers. If ``'all'``, share over all axes, which corresponds to a single scalar parameter. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. References ---------- .. [1] K He, X Zhang et al. (2015): Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification, http://arxiv.org/abs/1502.01852 Notes ----- The alpha parameter dimensionality is the input dimensionality minus the number of axes it is shared over, which matches the same convention as the :class:`BiasLayer`. >>> layer = ParametricRectifierLayer((20, 3, 28, 28), shared_axes=(0, 3)) >>> layer.alpha.get_value().shape (3, 28) """ def __init__(self, incoming, alpha=init.Constant(0.25), shared_axes='auto', **kwargs): super(ParametricRectifierLayer, self).__init__(incoming, **kwargs) if shared_axes == 'auto': self.shared_axes = (0,) + tuple(range(2, len(self.input_shape))) elif shared_axes == 'all': self.shared_axes = tuple(range(len(self.input_shape))) elif isinstance(shared_axes, int_types): self.shared_axes = (shared_axes,) else: self.shared_axes = shared_axes shape = [size for axis, size in enumerate(self.input_shape) if axis not in self.shared_axes] if any(size is None for size in shape): raise ValueError("ParametricRectifierLayer needs input sizes for " "all axes that alpha's are not shared over.") self.alpha = self.add_param(alpha, shape, name="alpha", regularizable=False) def get_output_for(self, input, **kwargs): axes = iter(range(self.alpha.ndim)) pattern = ['x' if input_axis in self.shared_axes else next(axes) for input_axis in range(input.ndim)] alpha = self.alpha.dimshuffle(pattern) return theano.tensor.nnet.relu(input, alpha) def prelu(layer, **kwargs): """ Convenience function to apply parametric rectify to a given layer's output. Will set the layer's nonlinearity to identity if there is one and will apply the parametric rectifier instead. Parameters ---------- layer: a :class:`Layer` instance The `Layer` instance to apply the parametric rectifier layer to; note that it will be irreversibly modified as specified above **kwargs Any additional keyword arguments are passed to the :class:`ParametericRectifierLayer` Examples -------- Note that this function modifies an existing layer, like this: >>> from lasagne.layers import InputLayer, DenseLayer, prelu >>> layer = InputLayer((32, 100)) >>> layer = DenseLayer(layer, num_units=200) >>> layer = prelu(layer) In particular, :func:`prelu` can *not* be passed as a nonlinearity. """ nonlinearity = getattr(layer, 'nonlinearity', None) if nonlinearity is not None: layer.nonlinearity = nonlinearities.identity return ParametricRectifierLayer(layer, **kwargs) class RandomizedRectifierLayer(Layer): """ A layer that applies a randomized leaky rectify nonlinearity to its input. The randomized leaky rectifier was first proposed and used in the Kaggle NDSB Competition, and later evaluated in [1]_. Compared to the standard leaky rectifier :func:`leaky_rectify`, it has a randomly sampled slope for negative input during training, and a fixed slope during evaluation. Equation for the randomized rectifier linear unit during training: :math:`\\varphi(x) = \\max((\\sim U(lower, upper)) \\cdot x, x)` During evaluation, the factor is fixed to the arithmetic mean of `lower` and `upper`. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape lower : Theano shared variable, expression, or constant The lower bound for the randomly chosen slopes. upper : Theano shared variable, expression, or constant The upper bound for the randomly chosen slopes. shared_axes : 'auto', 'all', int or tuple of int The axes along which the random slopes of the rectifier units are going to be shared. If ``'auto'`` (the default), share over all axes except for the second - this will share the random slope over the minibatch dimension for dense layers, and additionally over all spatial dimensions for convolutional layers. If ``'all'``, share over all axes, thus using a single random slope. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. References ---------- .. [1] Bing Xu, Naiyan Wang et al. (2015): Empirical Evaluation of Rectified Activations in Convolutional Network, http://arxiv.org/abs/1505.00853 """ def __init__(self, incoming, lower=0.3, upper=0.8, shared_axes='auto', **kwargs): super(RandomizedRectifierLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.lower = lower self.upper = upper if not isinstance(lower > upper, theano.Variable) and lower > upper: raise ValueError("Upper bound for RandomizedRectifierLayer needs " "to be higher than lower bound.") if shared_axes == 'auto': self.shared_axes = (0,) + tuple(range(2, len(self.input_shape))) elif shared_axes == 'all': self.shared_axes = tuple(range(len(self.input_shape))) elif isinstance(shared_axes, int_types): self.shared_axes = (shared_axes,) else: self.shared_axes = shared_axes def get_output_for(self, input, deterministic=False, **kwargs): """ Parameters ---------- input : tensor output from the previous layer deterministic : bool If true, the arithmetic mean of lower and upper are used for the leaky slope. """ if deterministic or self.upper == self.lower: return theano.tensor.nnet.relu(input, (self.upper+self.lower)/2.0) else: shape = list(self.input_shape) if any(s is None for s in shape): shape = list(input.shape) for ax in self.shared_axes: shape[ax] = 1 rnd = self._srng.uniform(tuple(shape), low=self.lower, high=self.upper, dtype=theano.config.floatX) rnd = theano.tensor.addbroadcast(rnd, *self.shared_axes) return theano.tensor.nnet.relu(input, rnd) def rrelu(layer, **kwargs): """ Convenience function to apply randomized rectify to a given layer's output. Will set the layer's nonlinearity to identity if there is one and will apply the randomized rectifier instead. Parameters ---------- layer: a :class:`Layer` instance The `Layer` instance to apply the randomized rectifier layer to; note that it will be irreversibly modified as specified above **kwargs Any additional keyword arguments are passed to the :class:`RandomizedRectifierLayer` Examples -------- Note that this function modifies an existing layer, like this: >>> from lasagne.layers import InputLayer, DenseLayer, rrelu >>> layer = InputLayer((32, 100)) >>> layer = DenseLayer(layer, num_units=200) >>> layer = rrelu(layer) In particular, :func:`rrelu` can *not* be passed as a nonlinearity. """ nonlinearity = getattr(layer, 'nonlinearity', None) if nonlinearity is not None: layer.nonlinearity = nonlinearities.identity return RandomizedRectifierLayer(layer, **kwargs) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/embedding.py0000644000175000017500000000416613307306052026267 0ustar sinclairssinclairsimport numpy as np import theano.tensor as T from .. import init from .base import Layer __all__ = [ "EmbeddingLayer" ] class EmbeddingLayer(Layer): """ lasagne.layers.EmbeddingLayer(incoming, input_size, output_size, W=lasagne.init.Normal(), **kwargs) A layer for word embeddings. The input should be an integer type Tensor variable. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. input_size: int The Number of different embeddings. The last embedding will have index input_size - 1. output_size : int The size of each embedding. W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the embedding matrix. This should be a matrix with shape ``(input_size, output_size)``. See :func:`lasagne.utils.create_param` for more information. Examples -------- >>> from lasagne.layers import EmbeddingLayer, InputLayer, get_output >>> import theano >>> x = T.imatrix() >>> l_in = InputLayer((3, )) >>> W = np.arange(3*5).reshape((3, 5)).astype('float32') >>> l1 = EmbeddingLayer(l_in, input_size=3, output_size=5, W=W) >>> output = get_output(l1, x) >>> f = theano.function([x], output) >>> x_test = np.array([[0, 2], [1, 2]]).astype('int32') >>> f(x_test) array([[[ 0., 1., 2., 3., 4.], [ 10., 11., 12., 13., 14.]], [[ 5., 6., 7., 8., 9.], [ 10., 11., 12., 13., 14.]]], dtype=float32) """ def __init__(self, incoming, input_size, output_size, W=init.Normal(), **kwargs): super(EmbeddingLayer, self).__init__(incoming, **kwargs) self.input_size = input_size self.output_size = output_size self.W = self.add_param(W, (input_size, output_size), name="W") def get_output_shape_for(self, input_shape): return input_shape + (self.output_size, ) def get_output_for(self, input, **kwargs): return self.W[input] Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/noise.py0000644000175000017500000001703513307306052025465 0ustar sinclairssinclairsimport theano import theano.tensor as T from .base import Layer from ..random import get_rng from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams __all__ = [ "DropoutLayer", "dropout", "dropout_channels", "spatial_dropout", "dropout_locations", "GaussianNoiseLayer", ] class DropoutLayer(Layer): """Dropout layer Sets values to zero with probability p. See notes for disabling dropout during testing. Parameters ---------- incoming : a :class:`Layer` instance or a tuple the layer feeding into this layer, or the expected input shape p : float or scalar tensor The probability of setting a value to zero rescale : bool If ``True`` (the default), scale the input by ``1 / (1 - p)`` when dropout is enabled, to keep the expected output mean the same. shared_axes : tuple of int Axes to share the dropout mask over. By default, each value can be dropped individually. ``shared_axes=(0,)`` uses the same mask across the batch. ``shared_axes=(2, 3)`` uses the same mask across the spatial dimensions of 2D feature maps. Notes ----- The dropout layer is a regularizer that randomly sets input values to zero; see [1]_, [2]_ for why this might improve generalization. The behaviour of the layer depends on the ``deterministic`` keyword argument passed to :func:`lasagne.layers.get_output`. If ``True``, the layer behaves deterministically, and passes on the input unchanged. If ``False`` or not specified, dropout (and possibly scaling) is enabled. Usually, you would use ``deterministic=False`` at train time and ``deterministic=True`` at test time. See also -------- dropout_channels : Drops full channels of feature maps spatial_dropout : Alias for :func:`dropout_channels` dropout_locations : Drops full pixels or voxels of feature maps References ---------- .. [1] Hinton, G., Srivastava, N., Krizhevsky, A., Sutskever, I., Salakhutdinov, R. R. (2012): Improving neural networks by preventing co-adaptation of feature detectors. arXiv preprint arXiv:1207.0580. .. [2] Srivastava Nitish, Hinton, G., Krizhevsky, A., Sutskever, I., & Salakhutdinov, R. R. (2014): Dropout: A Simple Way to Prevent Neural Networks from Overfitting. Journal of Machine Learning Research, 5(Jun)(2), 1929-1958. """ def __init__(self, incoming, p=0.5, rescale=True, shared_axes=(), **kwargs): super(DropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.p = p self.rescale = rescale self.shared_axes = tuple(shared_axes) def get_output_for(self, input, deterministic=False, **kwargs): if deterministic or self.p == 0: return input else: # Using theano constant to prevent upcasting one = T.constant(1, dtype='int8') retain_prob = one - self.p if self.rescale: input /= retain_prob # use nonsymbolic shape for dropout mask if possible mask_shape = self.input_shape if any(s is None for s in mask_shape): mask_shape = input.shape # apply dropout, respecting shared axes if self.shared_axes: shared_axes = tuple(a if a >= 0 else a + input.ndim for a in self.shared_axes) mask_shape = tuple(1 if a in shared_axes else s for a, s in enumerate(mask_shape)) mask = self._srng.binomial(mask_shape, p=retain_prob, dtype=input.dtype) if self.shared_axes: bcast = tuple(bool(s == 1) for s in mask_shape) mask = T.patternbroadcast(mask, bcast) return input * mask dropout = DropoutLayer # shortcut def dropout_channels(incoming, *args, **kwargs): """ Convenience function to drop full channels of feature maps. Adds a :class:`DropoutLayer` that sets feature map channels to zero, across all locations, with probability p. For convolutional neural networks, this may give better results than independent dropout [1]_. Parameters ---------- incoming : a :class:`Layer` instance or a tuple the layer feeding into this layer, or the expected input shape *args, **kwargs Any additional arguments and keyword arguments are passed on to the :class:`DropoutLayer` constructor, except for `shared_axes`. Returns ------- layer : :class:`DropoutLayer` instance The dropout layer with `shared_axes` set to drop channels. References ---------- .. [1] J. Tompson, R. Goroshin, A. Jain, Y. LeCun, C. Bregler (2014): Efficient Object Localization Using Convolutional Networks. https://arxiv.org/abs/1411.4280 """ ndim = len(getattr(incoming, 'output_shape', incoming)) kwargs['shared_axes'] = tuple(range(2, ndim)) return DropoutLayer(incoming, *args, **kwargs) spatial_dropout = dropout_channels # alias def dropout_locations(incoming, *args, **kwargs): """ Convenience function to drop full locations of feature maps. Adds a :class:`DropoutLayer` that sets feature map locations (i.e., pixels or voxels) to zero, across all channels, with probability p. Parameters ---------- incoming : a :class:`Layer` instance or a tuple the layer feeding into this layer, or the expected input shape *args, **kwargs Any additional arguments and keyword arguments are passed on to the :class:`DropoutLayer` constructor, except for `shared_axes`. Returns ------- layer : :class:`DropoutLayer` instance The dropout layer with `shared_axes` set to drop locations. """ kwargs['shared_axes'] = (1,) return DropoutLayer(incoming, *args, **kwargs) class GaussianNoiseLayer(Layer): """Gaussian noise layer. Add zero-mean Gaussian noise of given standard deviation to the input [1]_. Parameters ---------- incoming : a :class:`Layer` instance or a tuple the layer feeding into this layer, or the expected input shape sigma : float or tensor scalar Standard deviation of added Gaussian noise Notes ----- The Gaussian noise layer is a regularizer. During training you should set deterministic to false and during testing you should set deterministic to true. References ---------- .. [1] K.-C. Jim, C. Giles, and B. Horne (1996): An analysis of noise in recurrent neural networks: convergence and generalization. IEEE Transactions on Neural Networks, 7(6):1424-1438. """ def __init__(self, incoming, sigma=0.1, **kwargs): super(GaussianNoiseLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.sigma = sigma def get_output_for(self, input, deterministic=False, **kwargs): """ Parameters ---------- input : tensor output from the previous layer deterministic : bool If true noise is disabled, see notes """ if deterministic or self.sigma == 0: return input else: return input + self._srng.normal(input.shape, avg=0.0, std=self.sigma) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/shape.py0000644000175000017500000003557013307306052025454 0ustar sinclairssinclairsimport numpy as np import theano.tensor as T from ..theano_extensions import padding from ..utils import int_types from .base import Layer __all__ = [ "FlattenLayer", "flatten", "ReshapeLayer", "reshape", "DimshuffleLayer", "dimshuffle", "PadLayer", "pad", "SliceLayer" ] class FlattenLayer(Layer): """ A layer that flattens its input. The leading ``outdim-1`` dimensions of the output will have the same shape as the input. The remaining dimensions are collapsed into the last dimension. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. outdim : int The number of dimensions in the output. See Also -------- flatten : Shortcut """ def __init__(self, incoming, outdim=2, **kwargs): super(FlattenLayer, self).__init__(incoming, **kwargs) self.outdim = outdim if outdim < 1: raise ValueError('Dim must be >0, was %i', outdim) def get_output_shape_for(self, input_shape): to_flatten = input_shape[self.outdim - 1:] if any(s is None for s in to_flatten): flattened = None else: flattened = int(np.prod(to_flatten)) return input_shape[:self.outdim - 1] + (flattened,) def get_output_for(self, input, **kwargs): return input.flatten(self.outdim) flatten = FlattenLayer # shortcut class ReshapeLayer(Layer): """ A layer reshaping its input tensor to another tensor of the same total number of elements. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape shape : tuple The target shape specification. Each element can be one of: * ``i``, a positive integer directly giving the size of the dimension * ``[i]``, a single-element list of int, denoting to use the size of the ``i`` th input dimension * ``-1``, denoting to infer the size for this dimension to match the total number of elements in the input tensor (cannot be used more than once in a specification) * TensorVariable directly giving the size of the dimension Examples -------- >>> from lasagne.layers import InputLayer, ReshapeLayer >>> l_in = InputLayer((32, 100, 20)) >>> l1 = ReshapeLayer(l_in, ((32, 50, 40))) >>> l1.output_shape (32, 50, 40) >>> l_in = InputLayer((None, 100, 20)) >>> l1 = ReshapeLayer(l_in, ([0], [1], 5, -1)) >>> l1.output_shape (None, 100, 5, 4) Notes ----- The tensor elements will be fetched and placed in C-like order. That is, reshaping `[1,2,3,4,5,6]` to shape `(2,3)` will result in a matrix `[[1,2,3],[4,5,6]]`, not in `[[1,3,5],[2,4,6]]` (Fortran-like order), regardless of the memory layout of the input tensor. For C-contiguous input, reshaping is cheap, for others it may require copying the data. """ def __init__(self, incoming, shape, **kwargs): super(ReshapeLayer, self).__init__(incoming, **kwargs) shape = tuple(shape) for s in shape: if isinstance(s, int_types): if s == 0 or s < - 1: raise ValueError("`shape` integers must be positive or -1") elif isinstance(s, list): if len(s) != 1 or not isinstance(s[0], int_types) or s[0] < 0: raise ValueError("`shape` input references must be " "single-element lists of int >= 0") elif isinstance(s, T.TensorVariable): if s.ndim != 0: raise ValueError( "A symbolic variable in a shape specification must be " "a scalar, but had %i dimensions" % s.ndim) else: raise ValueError("`shape` must be a tuple of int and/or [int]") if sum(s == -1 for s in shape) > 1: raise ValueError("`shape` cannot contain multiple -1") self.shape = shape # try computing the output shape once as a sanity check self.get_output_shape_for(self.input_shape) def get_output_shape_for(self, input_shape, **kwargs): # Initialize output shape from shape specification output_shape = list(self.shape) # First, replace all `[i]` with the corresponding input dimension, and # mask parts of the shapes thus becoming irrelevant for -1 inference masked_input_shape = list(input_shape) masked_output_shape = list(output_shape) for dim, o in enumerate(output_shape): if isinstance(o, list): if o[0] >= len(input_shape): raise ValueError("specification contains [%d], but input " "shape has %d dimensions only" % (o[0], len(input_shape))) output_shape[dim] = input_shape[o[0]] masked_output_shape[dim] = input_shape[o[0]] if (input_shape[o[0]] is None) \ and (masked_input_shape[o[0]] is None): # first time we copied this unknown input size: mask # it, we have a 1:1 correspondence between out[dim] and # in[o[0]] and can ignore it for -1 inference even if # it is unknown. masked_input_shape[o[0]] = 1 masked_output_shape[dim] = 1 # Secondly, replace all symbolic shapes with `None`, as we cannot # infer their size here. for dim, o in enumerate(output_shape): if isinstance(o, T.TensorVariable): output_shape[dim] = None masked_output_shape[dim] = None # From the shapes, compute the sizes of the input and output tensor input_size = (None if any(x is None for x in masked_input_shape) else np.prod(masked_input_shape)) output_size = (None if any(x is None for x in masked_output_shape) else np.prod(masked_output_shape)) del masked_input_shape, masked_output_shape # Finally, infer value for -1 if needed if -1 in output_shape: dim = output_shape.index(-1) if (input_size is None) or (output_size is None): output_shape[dim] = None output_size = None else: output_size *= -1 output_shape[dim] = input_size // output_size output_size *= output_shape[dim] # Sanity check if (input_size is not None) and (output_size is not None) \ and (input_size != output_size): raise ValueError("%s cannot be reshaped to specification %s. " "The total size mismatches." % (input_shape, self.shape)) return tuple(output_shape) def get_output_for(self, input, **kwargs): # Replace all `[i]` with the corresponding input dimension output_shape = list(self.shape) for dim, o in enumerate(output_shape): if isinstance(o, list): output_shape[dim] = input.shape[o[0]] # Everything else is handled by Theano return input.reshape(tuple(output_shape)) reshape = ReshapeLayer # shortcut class DimshuffleLayer(Layer): """ A layer that rearranges the dimension of its input tensor, maintaining the same same total number of elements. Parameters ---------- incoming : a :class:`Layer` instance or a tuple the layer feeding into this layer, or the expected input shape pattern : tuple The new dimension order, with each element giving the index of the dimension in the input tensor or `'x'` to broadcast it. For example `(3,2,1,0)` will reverse the order of a 4-dimensional tensor. Use `'x'` to broadcast, e.g. `(3,2,1,'x',0)` will take a 4 tensor of shape `(2,3,5,7)` as input and produce a tensor of shape `(7,5,3,1,2)` with the 4th dimension being broadcast-able. In general, all dimensions in the input tensor must be used to generate the output tensor. Omitting a dimension attempts to collapse it; this can only be done to broadcast-able dimensions, e.g. a 5-tensor of shape `(7,5,3,1,2)` with the 4th being broadcast-able can be shuffled with the pattern `(4,2,1,0)` collapsing the 4th dimension resulting in a tensor of shape `(2,3,5,7)`. Examples -------- >>> from lasagne.layers import InputLayer, DimshuffleLayer >>> l_in = InputLayer((2, 3, 5, 7)) >>> l1 = DimshuffleLayer(l_in, (3, 2, 1, 'x', 0)) >>> l1.output_shape (7, 5, 3, 1, 2) >>> l2 = DimshuffleLayer(l1, (4, 2, 1, 0)) >>> l2.output_shape (2, 3, 5, 7) """ def __init__(self, incoming, pattern, **kwargs): super(DimshuffleLayer, self).__init__(incoming, **kwargs) # Sanity check the pattern used_dims = set() for p in pattern: if isinstance(p, int_types): # Dimension p if p in used_dims: raise ValueError("pattern contains dimension {0} more " "than once".format(p)) used_dims.add(p) elif p == 'x': # Broadcast pass else: raise ValueError("pattern should only contain dimension" "indices or 'x', not {0}".format(p)) self.pattern = pattern # try computing the output shape once as a sanity check self.get_output_shape_for(self.input_shape) def get_output_shape_for(self, input_shape): # Build output shape while keeping track of the dimensions that we are # attempting to collapse, so we can ensure that they are broadcastable output_shape = [] dims_used = [False] * len(input_shape) for p in self.pattern: if isinstance(p, int_types): if p < 0 or p >= len(input_shape): raise ValueError("pattern contains {0}, but input shape " "has {1} dimensions " "only".format(p, len(input_shape))) # Dimension p o = input_shape[p] dims_used[p] = True elif p == 'x': # Broadcast; will be of size 1 o = 1 output_shape.append(o) for i, (dim_size, used) in enumerate(zip(input_shape, dims_used)): if not used and dim_size != 1 and dim_size is not None: raise ValueError( "pattern attempted to collapse dimension " "{0} of size {1}; dimensions with size != 1/None are not" "broadcastable and cannot be " "collapsed".format(i, dim_size)) return tuple(output_shape) def get_output_for(self, input, **kwargs): return input.dimshuffle(self.pattern) dimshuffle = DimshuffleLayer # shortcut class PadLayer(Layer): """ Pad all dimensions except the first ``batch_ndim`` with ``width`` zeros on both sides, or with another value specified in ``val``. Individual padding for each dimension or edge can be specified using a tuple or list of tuples for ``width``. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape width : int, iterable of int, or iterable of tuple Padding width. If an int, pads each axis symmetrically with the same amount in the beginning and end. If an iterable of int, defines the symmetric padding width separately for each axis. If an iterable of tuples of two ints, defines a seperate padding width for each beginning and end of each axis. val : float Value used for padding batch_ndim : int Dimensions up to this value are not padded. For padding convolutional layers this should be set to 2 so the sample and filter dimensions are not padded """ def __init__(self, incoming, width, val=0, batch_ndim=2, **kwargs): super(PadLayer, self).__init__(incoming, **kwargs) self.width = width self.val = val self.batch_ndim = batch_ndim def get_output_shape_for(self, input_shape): output_shape = list(input_shape) if isinstance(self.width, int_types): widths = [self.width] * (len(input_shape) - self.batch_ndim) else: widths = self.width for k, w in enumerate(widths): if output_shape[k + self.batch_ndim] is None: continue else: try: l, r = w except TypeError: l = r = w output_shape[k + self.batch_ndim] += l + r return tuple(output_shape) def get_output_for(self, input, **kwargs): return padding.pad(input, self.width, self.val, self.batch_ndim) pad = PadLayer # shortcut class SliceLayer(Layer): """ Slices the input at a specific axis and at specific indices. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape indices : int or slice instance If an ``int``, selects a single element from the given axis, dropping the axis. If a slice, selects all elements in the given range, keeping the axis. axis : int Specifies the axis from which the indices are selected. Examples -------- >>> from lasagne.layers import SliceLayer, InputLayer >>> l_in = InputLayer((2, 3, 4)) >>> SliceLayer(l_in, indices=0, axis=1).output_shape ... # equals input[:, 0] (2, 4) >>> SliceLayer(l_in, indices=slice(0, 1), axis=1).output_shape ... # equals input[:, 0:1] (2, 1, 4) >>> SliceLayer(l_in, indices=slice(-2, None), axis=-1).output_shape ... # equals input[..., -2:] (2, 3, 2) """ def __init__(self, incoming, indices, axis=-1, **kwargs): super(SliceLayer, self).__init__(incoming, **kwargs) self.slice = indices self.axis = axis def get_output_shape_for(self, input_shape): output_shape = list(input_shape) if isinstance(self.slice, int_types): del output_shape[self.axis] elif input_shape[self.axis] is not None: output_shape[self.axis] = len( range(*self.slice.indices(input_shape[self.axis]))) else: output_shape[self.axis] = None return tuple(output_shape) def get_output_for(self, input, **kwargs): axis = self.axis if axis < 0: axis += input.ndim return input[(slice(None),) * axis + (self.slice,)] Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/cuda_convnet.py0000644000175000017500000006660513307306052027027 0ustar sinclairssinclairsimport warnings import numpy as np import theano import theano.tensor as T from .. import init from .. import nonlinearities from .base import Layer from .conv import conv_output_length, BaseConvLayer from .pool import pool_output_length from ..utils import as_tuple # bail out if using Theano's new GPU backend try: from theano import gpuarray except ImportError: from theano.sandbox import gpuarray if gpuarray.pygpu_activated: raise ImportError("cuda_convnet is not supported under Theano's new " "GPU backend. Please either use the ordinary " "convolutional and pooling layers (they are usually " "faster), or use Theano's old GPU backend (available " "up to Theano 0.9).") # pragma: no cover from theano.sandbox.cuda.basic_ops import gpu_contiguous from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs __all__ = [ "Conv2DCCLayer", "MaxPool2DCCLayer", "ShuffleBC01ToC01BLayer", "bc01_to_c01b", "ShuffleC01BToBC01Layer", "c01b_to_bc01", "NINLayer_c01b", ] if not theano.sandbox.cuda.cuda_enabled: raise ImportError( "requires GPU support -- see http://lasagne.readthedocs.org/en/" "latest/user/installation.html#gpu-support") # pragma: no cover if theano.config.floatX == 'float64': warnings.warn("You are using a GPU layer with Theano configured for " "double precision (floatX=float64). Depending on your " "Theano version and GPU, this may be slow or unsupported. " "We recommend to configure Theano for single precision " "(floatX=float32); see http://lasagne.readthedocs.org/en/" "latest/user/installation.html#gpu-support.") class Conv2DCCLayer(BaseConvLayer): """ lasagne.layers.Conv2DCCLayer(incoming, num_filters, filter_size, stride=(1, 1), pad=0, untie_biases=False, W=None, b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, dimshuffle=True, flip_filters=False, partial_sum=1, **kwargs) 2D convolutional layer Performs a 2D convolution on its input and optionally adds a bias and applies an elementwise nonlinearity. This is an alternative implementation which uses the cuda-convnet wrappers from pylearn2: ``pylearn2.sandbox.cuda_convnet.filter_acts.FilterActs``. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. This layer expects a 4D tensor as its input, with shape ``(batch_size, num_input_channels, input_rows, input_columns)``. If automatic dimshuffling is disabled (see notes), the shape should be ``(num_input_channels, input_rows, input_columns, batch_size)`` instead (c01b axis order). num_filters : int The number of learnable convolutional filters this layer has. filter_size : int or iterable of int An integer or a 2-element tuple specifying the size of the filters. This layer does not support non-square filters. stride : int or iterable of int An integer or a 2-element tuple specifying the stride of the convolution operation. This layer does not support using different strides along both axes. pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0) By default, the convolution is only computed where the input and the filter fully overlap (a valid convolution). When ``stride=1``, this yields an output that is smaller than the input by ``filter_size - 1``. The `pad` argument allows you to implicitly pad the input with zeros, extending the output size. A single integer results in symmetric zero-padding of the given size on all borders. This layer does not support using different amounts of padding along both axes, but for compatibility to other layers you can still specify the padding as a tuple of two same-valued integers. ``'full'`` pads with one less than the filter size on both sides. This is equivalent to computing the convolution wherever the input and the filter overlap by at least one position. ``'same'`` pads with half the filter size (rounded down) on both sides. When ``stride=1`` this results in an output size equal to the input size. Even filter size is not supported. ``'valid'`` is an alias for ``0`` (no padding / a valid convolution). Note that ``'full'`` and ``'same'`` can be faster than equivalent integer values due to optimizations by Theano. untie_biases : bool (default: False) If ``False``, the layer will have a bias parameter for each channel, which is shared across all positions in this channel. As a result, the `b` attribute will be a vector (1D). If ``True``, the layer will have separate bias parameters for each position in each channel. As a result, the `b` attribute will be a 3D tensor. W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. These should be a 4D tensor with shape ``(num_filters, num_input_channels, filter_rows, filter_columns)``. If automatic dimshuffling is disabled (see notes), the shape should be ``(num_input_channels, input_rows, input_columns, num_filters)`` instead (c01b axis order). See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to ``False``. If it is set to ``True``, its shape should be ``(num_filters, output_rows, output_columns)`` instead. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. dimshuffle : bool (default: True) If ``True``, the layer will automatically apply the necessary dimshuffle operations to deal with the fact that the cuda-convnet implementation uses c01b (batch-size-last) axis order instead of bc01 (batch-size-first), which is the Lasagne/Theano default. This makes the layer interoperable with other Lasagne layers. If ``False``, this automatic dimshuffling is disabled and the layer will expect its input and parameters to have c01b axis order. It is up to the user to ensure this. :class:`ShuffleBC01ToC01BLayer` and :class:`ShuffleC01BToBC01Layer` can be used to convert between bc01 and c01b axis order. flip_filters : bool (default: False) Whether to flip the filters and perform a convolution, or not to flip them and perform a correlation. Flipping adds a bit of overhead, so it is disabled by default. In most cases this does not make a difference anyway because the filters are learnt. However, ``flip_filters`` should be set to ``True`` if weights are loaded into it that were learnt using a regular :class:`lasagne.layers.Conv2DLayer`, for example. partial_sum : int or None (default: 1) This value tunes the trade-off between memory usage and performance. You can specify any positive integer that is a divisor of the output feature map size (i.e. output rows times output columns). Higher values decrease memory usage, but also performance. Specifying 0 or ``None`` means the highest possible value will be used. The Lasagne default of ``1`` gives the best performance, but also the highest memory usage. More information about this parameter can be found in the `cuda-convnet documentation `_. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. Attributes ---------- W : Theano shared variable or expression Variable or expression representing the filter weights. b : Theano shared variable or expression Variable or expression representing the biases. Notes ----- The cuda-convnet convolution implementation has several limitations: * only square filters are supported. * only identical strides in the horizontal and vertical direction are supported. * the number of filters must be a multiple of 16. * the number of input channels must be even, or less than or equal to 3. * if the gradient w.r.t. the input is to be computed, the number of channels must be divisible by 4. * performance is optimal when the batch size is a multiple of 128 (but other batch sizes are supported). * this layer only works on the GPU. The cuda-convnet convolution implementation uses c01b (batch-size-last) axis order by default. The Theano/Lasagne default is bc01 (batch-size-first). This layer automatically adds the necessary dimshuffle operations for the input and the parameters so that it is interoperable with other layers that assume bc01 axis order. However, these additional dimshuffle operations may sometimes negatively affect performance. For this reason, it is possible to disable them by setting ``dimshuffle=False``. In this case, the user is expected to manually ensure that the input and parameters have the correct axis order. :class:`ShuffleBC01ToC01BLayer` and :class:`ShuffleC01BToBC01Layer` can be used to convert between bc01 and c01b axis order. """ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1), pad=0, untie_biases=False, W=None, b=init.Constant(0.), nonlinearity=nonlinearities.rectify, dimshuffle=True, flip_filters=False, partial_sum=1, **kwargs): if W is None: if dimshuffle: W = init.GlorotUniform() else: W = init.GlorotUniform(c01b=True) self.dimshuffle = dimshuffle super(Conv2DCCLayer, self).__init__(incoming, num_filters, filter_size, stride, pad, untie_biases, W, b, nonlinearity, flip_filters, n=2, **kwargs) self.partial_sum = partial_sum if self.filter_size[0] != self.filter_size[1]: raise RuntimeError("Conv2DCCLayer only supports square filters, " "but filter_size=(%d, %d)" % filter_size) if self.stride[0] != self.stride[1]: raise RuntimeError("Conv2DCCLayer only supports square strides, " "but stride=(%d, %d)" % stride) if self.num_filters % 16 != 0: raise RuntimeError("Conv2DCCLayer requires num_filters to be a " "multiple of 16, but num_filters is " "%d" % num_filters) if not (self.num_input_channels < 4 or self.num_input_channels % 4 == 0): raise RuntimeError("Conv2DCCLayer requires the number of input " "channels to be 1, 2, 3 or a multiple of 4, " "but it is %d" % self.num_input_channels) if isinstance(self.pad, tuple): if self.pad[0] != self.pad[1]: raise RuntimeError("Conv2DCCLayer only supports square " "padding, but pad=(%d, %d)" % pad) pad = self.pad[0] elif self.pad == 'same': pad = self.filter_size[0] // 2 elif self.pad == 'full': pad = self.filter_size[0] - 1 if not self.dimshuffle and self.untie_biases and self.b is not None: del self.params[self.b] biases_shape = (num_filters, self.output_shape[1], self.output_shape[2]) self.b = self.add_param(b, biases_shape, name="b", regularizable=False) self.filter_acts_op = FilterActs(stride=self.stride[0], partial_sum=self.partial_sum, pad=pad) @property def num_input_channels(self): if self.dimshuffle: return self.input_shape[1] else: return self.input_shape[0] def get_W_shape(self): if self.dimshuffle: return super(Conv2DCCLayer, self).get_W_shape() else: return ((self.num_input_channels,) + self.filter_size + (self.num_filters,)) def get_output_shape_for(self, input_shape): if not self.dimshuffle: # c01b to bc01 input_shape = (input_shape[3], input_shape[0], input_shape[1], input_shape[2]) shape = super(Conv2DCCLayer, self).get_output_shape_for(input_shape) if not self.dimshuffle: # bc01 to c01b shape = (shape[1], shape[2], shape[3], shape[0]) return shape def get_output_for(self, input, **kwargs): if self.dimshuffle: filters = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b input = input.dimshuffle(1, 2, 3, 0) # bc01 to c01b else: filters = self.W if self.flip_filters: filters = filters[:, ::-1, ::-1, :] # flip top-down, left-right contiguous_filters = gpu_contiguous(filters) contiguous_input = gpu_contiguous(input) conved = self.filter_acts_op(contiguous_input, contiguous_filters) if self.stride != 1: # cuda-convnet calculates a non-standard strided output shape, # so we need to truncate the output in this case pad = self.pad if isinstance(self.pad, tuple) else (self.pad,) * 2 true_rows = conv_output_length(input.shape[1], self.filter_size[0], self.stride[0], pad[0]) true_columns = conv_output_length(input.shape[2], self.filter_size[1], self.stride[1], pad[1]) conved = conved[:, :true_rows, :true_columns, :] if self.b is not None: if self.untie_biases: biases = self.b.dimshuffle(0, 1, 2, 'x') # c01 to c01b else: biases = self.b.dimshuffle(0, 'x', 'x', 'x') # c to c01b conved += biases conved = self.nonlinearity(conved) if self.dimshuffle: return conved.dimshuffle(3, 0, 1, 2) # c01b to bc01 else: return conved class MaxPool2DCCLayer(Layer): """ 2D max-pooling layer Performs 2D max-pooling over the two trailing axes of a 4D input tensor (or over axis 1 and 2 if ``dimshuffle=False``, see notes). This is an alternative implementation which uses the cuda-convnet wrappers from pylearn2: ``pylearn2.sandbox.cuda_convnet.pool.MaxPool``. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer or iterable The length of the pooling region in each dimension. If an integer, it is promoted to a square pooling region. If an iterable, it should have two elements. This layer does not support non-square pooling regions. stride : integer, iterable or ``None`` The strides between sucessive pooling regions in each dimension. If ``None`` then ``stride = pool_size``. This layer does not support using different strides along both axes. pad : integer or iterable (default: 0) This implementation does not support custom padding, so this argument must always be set to ``0``. It exists only to make sure the interface is compatible with :class:`lasagne.layers.MaxPool2DLayer`. ignore_border : bool (default: False) This implementation always includes partial pooling regions, so this argument must always be set to False. It exists only to make sure the interface is compatible with :class:`lasagne.layers.MaxPool2DLayer`. dimshuffle : bool (default: True) If ``True``, the layer will automatically apply the necessary dimshuffle operations to deal with the fact that the cuda-convnet implementation uses c01b (batch-size-last) axis order instead of bc01 (batch-size-first), which is the Lasagne/Theano default. This makes the layer interoperable with other Lasagne layers. If ``False``, this automatic dimshuffling is disabled and the layer will expect its input and parameters to have c01b axis order. It is up to the user to ensure this. :class:`ShuffleBC01ToC01BLayer` and :class:`ShuffleC01BToBC01Layer` can be used to convert between bc01 and c01b axis order. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- The cuda-convnet max-pooling implementation has several limitations: * only square pooling regions are supported. * only identical strides in the horizontal and vertical direction are supported. * only square inputs are supported. (This limitation does not exist for the convolution implementation.) * partial pooling regions are always included (``ignore_border`` is forced to ``False``). * custom padding is not supported (``pad`` is forced to ``0``). * this layer only works on the GPU. The cuda-convnet pooling implementation uses c01b (batch-size-last) axis order by default. The Theano/Lasagne default is bc01 (batch-size-first). This layer automatically adds the necessary dimshuffle operations for the input and the parameters so that it is interoperable with other layers that assume bc01 axis order. However, these additional dimshuffle operations may sometimes negatively affect performance. For this reason, it is possible to disable them by setting ``dimshuffle=False``. In this case, the user is expected to manually ensure that the input and parameters have the correct axis order. :class:`ShuffleBC01ToC01BLayer` and :class:`ShuffleC01BToBC01Layer` can be used to convert between bc01 and c01b axis order. """ def __init__(self, incoming, pool_size, stride=None, ignore_border=False, dimshuffle=True, **kwargs): from pylearn2.sandbox.cuda_convnet.pool import MaxPool if 'pad' in kwargs: pad = kwargs.pop('pad') if as_tuple(pad, 2) != (0, 0): raise NotImplementedError("MaxPool2DCCLayer does not " "support padding") super(MaxPool2DCCLayer, self).__init__(incoming, **kwargs) pool_size = as_tuple(pool_size, 2) if pool_size[0] != pool_size[1]: raise NotImplementedError("MaxPool2DCCLayer only supports square " "pooling regions, but pool_size=(%d, %d)" % pool_size) self.pool_size = pool_size[0] if stride is None: self.stride = self.pool_size else: stride = as_tuple(stride, 2) if stride[0] != stride[1]: raise NotImplementedError("MaxPool2DCCLayer only supports " "using the same stride in both " "directions but stride=(%d, %d)" % stride) self.stride = stride[0] if self.stride > self.pool_size: raise NotImplementedError("MaxPool2DCCLayer only supports " "stride <= pool_size.") # The ignore_border argument is for compatibility with MaxPool2DLayer. # ignore_border=True is not supported. Borders are never ignored. if ignore_border: raise NotImplementedError("MaxPool2DCCLayer does not support " "ignore_border=True.") self.dimshuffle = dimshuffle self.pool_op = MaxPool(ds=self.pool_size, stride=self.stride) def get_output_shape_for(self, input_shape): if self.dimshuffle: batch_size = input_shape[0] num_input_channels = input_shape[1] input_rows, input_columns = input_shape[2:4] else: batch_size = input_shape[3] num_input_channels = input_shape[0] input_rows, input_columns = input_shape[1:3] output_rows = pool_output_length(input_rows, pool_size=self.pool_size, stride=self.stride, pad=0, ignore_border=False, ) output_columns = pool_output_length(input_columns, pool_size=self.pool_size, stride=self.stride, pad=0, ignore_border=False, ) if self.dimshuffle: return (batch_size, num_input_channels, output_rows, output_columns) else: return (num_input_channels, output_rows, output_columns, batch_size) def get_output_for(self, input, **kwargs): if self.dimshuffle: input = input.dimshuffle(1, 2, 3, 0) # bc01 to c01b contiguous_input = gpu_contiguous(input) pooled = self.pool_op(contiguous_input) if self.dimshuffle: return pooled.dimshuffle(3, 0, 1, 2) # c01b to bc01 else: return pooled # Helper classes for switching between bc01 and c01b input formats class ShuffleBC01ToC01BLayer(Layer): """ shuffle 4D input from bc01 (batch-size-first) order to c01b (batch-size-last) order. This layer can be used for interoperability between c01b and bc01 layers. For example, :class:`MaxPool2DCCLayer` and :class:`Conv2DCCLayer` operate in c01b mode when they are created with ``dimshuffle=False``. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. """ def get_output_shape_for(self, input_shape): return (input_shape[1], input_shape[2], input_shape[3], input_shape[0]) def get_output_for(self, input, **kwargs): return input.dimshuffle(1, 2, 3, 0) bc01_to_c01b = ShuffleBC01ToC01BLayer # shortcut class ShuffleC01BToBC01Layer(Layer): """ shuffle 4D input from c01b (batch-size-last) order to bc01 (batch-size-first) order. This layer can be used for interoperability between c01b and bc01 layers. For example, :class:`MaxPool2DCCLayer` and :class:`Conv2DCCLayer` operate in c01b mode when they are created with ``dimshuffle=False``. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. """ def get_output_shape_for(self, input_shape): return (input_shape[3], input_shape[0], input_shape[1], input_shape[2]) def get_output_for(self, input, **kwargs): return input.dimshuffle(3, 0, 1, 2) c01b_to_bc01 = ShuffleC01BToBC01Layer # shortcut # c01b versions of other Layer classes class NINLayer_c01b(Layer): """ lasagne.layers.NINLayer_c01b(incoming, num_units, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs) Network-in-network layer with c01b axis ordering. This is a c01b version of :class:`lasagne.layers.NINLayer`. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape num_units : int The number of units of the layer untie_biases : bool If ``False``, the network has a single bias vector similar to a dense layer. If ``True``, a separate bias vector is used for each spatial position. W : Theano shared variable, numpy array or callable An initializer for the weights of the layer. If a shared variable or a numpy array is provided the shape should be (num_units, num_input_channels). See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, numpy array, callable or None An initializer for the biases of the layer. If a shared variable or a numpy array is provided the correct shape is determined by the untie_biases setting. If untie_biases is ``False``, then the shape should be ``(num_units,)``. If untie_biases is ``True`` then the shape should be ``(num_units, rows, columns)``. If ``None`` is provided the layer will have no biases. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. """ def __init__(self, incoming, num_units, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, **kwargs): super(NINLayer_c01b, self).__init__(incoming, **kwargs) if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity self.num_units = num_units self.untie_biases = untie_biases num_input_channels = self.input_shape[0] self.W = self.add_param(W, (num_units, num_input_channels), name="W") if b is None: self.b = None else: if self.untie_biases: biases_shape = (num_units,) + self.output_shape[1:-1] else: biases_shape = (num_units,) self.b = self.add_param(b, biases_shape, name="b", regularizable=False) def get_output_shape_for(self, input_shape): return (self.num_units,) + input_shape[1:] def get_output_for(self, input, **kwargs): # fc * c01b... = f01b... out = T.tensordot(self.W, input, axes=[[1], [0]]) if self.b is None: activation = out else: if self.untie_biases: bias_axes = range(input.ndim - 1) + ['x'] else: bias_axes = [0] + (['x'] * (input.ndim - 1)) b_shuffled = self.b.dimshuffle(bias_axes) activation = out + b_shuffled return self.nonlinearity(activation) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/input.py0000644000175000017500000000477513307306052025516 0ustar sinclairssinclairsfrom collections import OrderedDict import theano import theano.tensor as T from .. import utils from .base import Layer __all__ = [ "InputLayer", ] class InputLayer(Layer): """ This layer holds a symbolic variable that represents a network input. A variable can be specified when the layer is instantiated, else it is created. Parameters ---------- shape : tuple of `int` or `None` elements The shape of the input. Any element can be `None` to indicate that the size of that dimension is not fixed at compile time. input_var : Theano symbolic variable or `None` (default: `None`) A variable representing a network input. If it is not provided, a variable will be created. Raises ------ ValueError If the dimension of `input_var` is not equal to `len(shape)` Notes ----- The first dimension usually indicates the batch size. If you specify it, Theano may apply more optimizations while compiling the training or prediction function, but the compiled function will not accept data of a different batch size at runtime. To compile for a variable batch size, set the first shape element to `None` instead. Examples -------- >>> from lasagne.layers import InputLayer >>> l_in = InputLayer((100, 20)) """ def __init__(self, shape, input_var=None, name=None, **kwargs): self.shape = tuple(shape) if any(d is not None and d <= 0 for d in self.shape): raise ValueError(( "Cannot create InputLayer with a non-positive shape " "dimension. shape=%r, self.name=%r") % ( self.shape, name)) ndim = len(self.shape) if input_var is None: # create the right TensorType for the given dimensionality/shape input_var_type = T.TensorType(theano.config.floatX, [s == 1 for s in self.shape]) var_name = ("%s.input" % name) if name is not None else "input" input_var = input_var_type(var_name) else: # ensure the given variable has the correct dimensionality if input_var.ndim != ndim: raise ValueError("shape has %d dimensions, but variable has " "%d" % (ndim, input_var.ndim)) self.input_var = input_var self.name = name self.params = OrderedDict() @Layer.output_shape.getter def output_shape(self): return self.shape Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/merge.py0000644000175000017500000003544713307306052025456 0ustar sinclairssinclairsimport theano.tensor as T from .base import MergeLayer __all__ = [ "autocrop", "autocrop_array_shapes", "ConcatLayer", "concat", "ElemwiseMergeLayer", "ElemwiseSumLayer", ] def autocrop(inputs, cropping): """ Crops the given input arrays. Cropping takes a sequence of inputs and crops them per-axis in order to ensure that their sizes are consistent so that they can be combined in an element-wise fashion. If cropping is enabled for a specific axis, the minimum size in that axis of all inputs is computed, and all inputs are cropped to that size. The per-axis cropping modes are: `None`: this axis is not cropped, inputs are unchanged in this axis `'lower'`: inputs are cropped choosing the lower portion in this axis (`a[:crop_size, ...]`) `'upper'`: inputs are cropped choosing the upper portion in this axis (`a[-crop_size:, ...]`) `'center'`: inputs are cropped choosing the central portion in this axis (``a[offset:offset+crop_size, ...]`` where ``offset = (a.shape[0]-crop_size)//2)`` Parameters ---------- inputs : list of Theano expressions The input arrays in the form of a list of Theano expressions cropping : list of cropping modes Cropping modes, one for each axis. If length of `cropping` is less than the number of axes in the inputs, it is padded with `None`. If `cropping` is None, `input` is returned as is. Returns ------- list of Theano expressions each expression is the cropped version of the corresponding input Example ------- For example, given three inputs: >>> import numpy >>> import theano >>> a = numpy.random.random((1, 2, 3, 4)) >>> b = numpy.random.random((5, 4, 4, 2)) >>> c = numpy.random.random((7, 1, 8, 9)) Cropping mode for each axis: >>> cropping = [None, 'lower', 'center', 'upper'] Crop (note that the input arrays are converted to Theano vars first, and that the results are converted back from Theano expressions to numpy arrays by calling `eval()`) >>> xa, xb, xc = autocrop([theano.shared(a), \ theano.shared(b), \ theano.shared(c)], cropping) >>> xa, xb, xc = xa.eval(), xb.eval(), xc.eval() They will be left as is in axis 0 and cropped in the other three, choosing the lower, center and upper portions: Axis 0: choose all, axis 1: lower 1 element, axis 2: central 3 (all) and axis 3: upper 2 >>> (xa == a[:, :1, :3, -2:]).all() True Axis 0: choose all, axis 1: lower 1 element, axis 2: central 3 starting at 0 and axis 3: upper 2 (all) >>> (xb == b[:, :1, :3, -2:]).all() True Axis 0: all, axis 1: lower 1 element (all), axis 2: central 3 starting at 2 and axis 3: upper 2 >>> (xc == c[:, :1, 2:5:, -2:]).all() True """ if cropping is None: # No cropping in any dimension return inputs else: # Get the number of dimensions ndim = inputs[0].ndim # Check for consistent number of dimensions if not all(input.ndim == ndim for input in inputs): raise ValueError("Not all inputs are of the same " "dimensionality. Got {0} inputs of " "dimensionalities {1}.".format( len(inputs), [input.ndim for input in inputs])) # Get the shape of each input, where each shape will be a Theano # expression shapes = [input.shape for input in inputs] # Convert the shapes to a matrix expression shapes_tensor = T.as_tensor_variable(shapes) # Min along axis 0 to get the minimum size in each dimension min_shape = T.min(shapes_tensor, axis=0) # Nested list of slices; each list in `slices` corresponds to # an input and contains a slice for each dimension slices_by_input = [[] for i in range(len(inputs))] # If there are more dimensions than cropping entries, pad # the cropping cropping = list(cropping) if ndim > len(cropping): cropping = list(cropping) + \ [None] * (ndim - len(cropping)) # For each dimension for dim, cr in enumerate(cropping): if cr is None: # Don't crop this dimension slice_all = slice(None) for slices in slices_by_input: slices.append(slice_all) else: # We crop all inputs in the dimension `dim` so that they # are the minimum found in this dimension from all inputs sz = min_shape[dim] if cr == 'lower': # Choose the first `sz` elements slc_lower = slice(None, sz) for slices in slices_by_input: slices.append(slc_lower) elif cr == 'upper': # Choose the last `sz` elements slc_upper = slice(-sz, None) for slices in slices_by_input: slices.append(slc_upper) elif cr == 'center': # Choose `sz` elements from the center for sh, slices in zip(shapes, slices_by_input): offset = (sh[dim] - sz) // 2 slices.append(slice(offset, offset+sz)) else: raise ValueError( 'Unknown crop mode \'{0}\''.format(cr)) return [input[slices] for input, slices in zip(inputs, slices_by_input)] def autocrop_array_shapes(input_shapes, cropping): """ Computes the shapes of the given arrays after auto-cropping is applied. For more information on cropping, see the :func:`autocrop` function documentation. Parameters ---------- input_shapes : the shapes of input arrays prior to cropping in the form of a list of tuples cropping : a list of cropping modes, one for each axis. If length of `cropping` is less than the number of axes in the inputs, it is padded with `None`. If `cropping` is None, `input_shapes` is returned as is. For more information on their values and operation, see the :func:`autocrop` documentation. Returns ------- list of tuples each tuple is a cropped version of the corresponding input shape tuple in `input_shapes` For example, given three input shapes with 4 axes each: >>> a = (1, 2, 3, 4) >>> b = (5, 4, 4, 2) >>> c = (7, 1, 8, 9) Cropping mode for each axis: >>> cropping = [None, 'lower', 'center', 'upper'] Apply: >>> cropped_shapes = autocrop_array_shapes([a, b, c], cropping) >>> cropped_shapes[0] (1, 1, 3, 2) >>> cropped_shapes[1] (5, 1, 3, 2) >>> cropped_shapes[2] (7, 1, 3, 2) Note that axis 0 remains unchanged, where all the others are cropped to the minimum size in that axis. """ if cropping is None: return input_shapes else: # Check for consistent number of dimensions ndim = len(input_shapes[0]) if not all(len(sh) == ndim for sh in input_shapes): raise ValueError("Not all inputs are of the same " "dimensionality. Got {0} inputs of " "dimensionalities {1}.".format( len(input_shapes), [len(sh) for sh in input_shapes])) result = [] # If there are more dimensions than cropping entries, pad # the cropping cropping = list(cropping) if ndim > len(cropping): cropping = list(cropping) + \ [None] * (ndim - len(cropping)) for sh, cr in zip(zip(*input_shapes), cropping): if cr is None: result.append(sh) elif cr in {'lower', 'center', 'upper'}: min_sh = None if any(x is None for x in sh) else min(sh) result.append([min_sh] * len(sh)) else: raise ValueError('Unknown crop mode \'{0}\''.format(cr)) return [tuple(sh) for sh in zip(*result)] class ConcatLayer(MergeLayer): """ Concatenates multiple inputs along the specified axis. Inputs should have the same shape except for the dimension specified in axis, which can have different sizes. Parameters ----------- incomings : a list of :class:`Layer` instances or tuples The layers feeding into this layer, or expected input shapes axis : int Axis which inputs are joined over cropping : None or [crop] Cropping for each input axis. Cropping is described in the docstring for :func:`autocrop`. Cropping is always disabled for `axis`. """ def __init__(self, incomings, axis=1, cropping=None, **kwargs): super(ConcatLayer, self).__init__(incomings, **kwargs) self.axis = axis if cropping is not None: # If cropping is enabled, don't crop on the selected axis cropping = list(cropping) cropping[axis] = None self.cropping = cropping def get_output_shape_for(self, input_shapes): input_shapes = autocrop_array_shapes(input_shapes, self.cropping) # Infer the output shape by grabbing, for each axis, the first # input size that is not `None` (if there is any) output_shape = [next((s for s in sizes if s is not None), None) for sizes in zip(*input_shapes)] def match(shape1, shape2): axis = self.axis if self.axis >= 0 else len(shape1) + self.axis return (len(shape1) == len(shape2) and all(i == axis or s1 is None or s2 is None or s1 == s2 for i, (s1, s2) in enumerate(zip(shape1, shape2)))) # Check for compatibility with inferred output shape if not all(match(shape, output_shape) for shape in input_shapes): raise ValueError("Mismatch: input shapes must be the same except " "in the concatenation axis") # Infer output shape on concatenation axis and return sizes = [input_shape[self.axis] for input_shape in input_shapes] concat_size = None if any(s is None for s in sizes) else sum(sizes) output_shape[self.axis] = concat_size return tuple(output_shape) def get_output_for(self, inputs, **kwargs): inputs = autocrop(inputs, self.cropping) return T.concatenate(inputs, axis=self.axis) concat = ConcatLayer # shortcut class ElemwiseMergeLayer(MergeLayer): """ This layer performs an elementwise merge of its input layers. It requires all input layers to have the same output shape. Parameters ---------- incomings : a list of :class:`Layer` instances or tuples the layers feeding into this layer, or expected input shapes, with all incoming shapes being equal merge_function : callable the merge function to use. Should take two arguments and return the updated value. Some possible merge functions are ``theano.tensor``: ``mul``, ``add``, ``maximum`` and ``minimum``. cropping : None or [crop] Cropping for each input axis. Cropping is described in the docstring for :func:`autocrop` See Also -------- ElemwiseSumLayer : Shortcut for sum layer. """ def __init__(self, incomings, merge_function, cropping=None, **kwargs): super(ElemwiseMergeLayer, self).__init__(incomings, **kwargs) self.merge_function = merge_function self.cropping = cropping def get_output_shape_for(self, input_shapes): input_shapes = autocrop_array_shapes(input_shapes, self.cropping) # Infer the output shape by grabbing, for each axis, the first # input size that is not `None` (if there is any) output_shape = tuple(next((s for s in sizes if s is not None), None) for sizes in zip(*input_shapes)) def match(shape1, shape2): return (len(shape1) == len(shape2) and all(s1 is None or s2 is None or s1 == s2 for s1, s2 in zip(shape1, shape2))) # Check for compatibility with inferred output shape if not all(match(shape, output_shape) for shape in input_shapes): raise ValueError("Mismatch: not all input shapes are the same") return output_shape def get_output_for(self, inputs, **kwargs): inputs = autocrop(inputs, self.cropping) output = None for input in inputs: if output is not None: output = self.merge_function(output, input) else: output = input return output class ElemwiseSumLayer(ElemwiseMergeLayer): """ This layer performs an elementwise sum of its input layers. It requires all input layers to have the same output shape. Parameters ---------- incomings : a list of :class:`Layer` instances or tuples the layers feeding into this layer, or expected input shapes, with all incoming shapes being equal coeffs: list or scalar A same-sized list of coefficients, or a single coefficient that is to be applied to all instances. By default, these will not be included in the learnable parameters of this layer. cropping : None or [crop] Cropping for each input axis. Cropping is described in the docstring for :func:`autocrop` Notes ----- Depending on your architecture, this can be used to avoid the more costly :class:`ConcatLayer`. For example, instead of concatenating layers before a :class:`DenseLayer`, insert separate :class:`DenseLayer` instances of the same number of output units and add them up afterwards. (This avoids the copy operations in concatenation, but splits up the dot product.) """ def __init__(self, incomings, coeffs=1, cropping=None, **kwargs): super(ElemwiseSumLayer, self).__init__(incomings, T.add, cropping=cropping, **kwargs) if isinstance(coeffs, list): if len(coeffs) != len(incomings): raise ValueError("Mismatch: got %d coeffs for %d incomings" % (len(coeffs), len(incomings))) else: coeffs = [coeffs] * len(incomings) self.coeffs = coeffs def get_output_for(self, inputs, **kwargs): # if needed multiply each input by its coefficient inputs = [input * coeff if coeff != 1 else input for coeff, input in zip(self.coeffs, inputs)] # pass scaled inputs to the super class for summing return super(ElemwiseSumLayer, self).get_output_for(inputs, **kwargs) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/pool.py0000644000175000017500000012006513307306052025317 0ustar sinclairssinclairsimport theano.tensor as T from .base import Layer from ..utils import as_tuple __all__ = [ "MaxPool1DLayer", "MaxPool2DLayer", "MaxPool3DLayer", "Pool1DLayer", "Pool2DLayer", "Pool3DLayer", "Upscale1DLayer", "Upscale2DLayer", "Upscale3DLayer", "FeaturePoolLayer", "FeatureWTALayer", "GlobalPoolLayer", "SpatialPyramidPoolingLayer", ] def pool_output_length(input_length, pool_size, stride, pad, ignore_border): """ Compute the output length of a pooling operator along a single dimension. Parameters ---------- input_length : integer The length of the input in the pooling dimension pool_size : integer The length of the pooling region stride : integer The stride between successive pooling regions pad : integer The number of elements to be added to the input on each side. ignore_border: bool If ``True``, partial pooling regions will be ignored. Must be ``True`` if ``pad != 0``. Returns ------- output_length * None if either input is None. * Computed length of the pooling operator otherwise. Notes ----- When ``ignore_border == True``, this is given by the number of full pooling regions that fit in the padded input length, divided by the stride (rounding down). If ``ignore_border == False``, a single partial pooling region is appended if at least one input element would be left uncovered otherwise. """ if input_length is None or pool_size is None: return None if ignore_border: output_length = input_length + 2 * pad - pool_size + 1 output_length = (output_length + stride - 1) // stride # output length calculation taken from: # https://github.com/Theano/Theano/blob/master/theano/tensor/signal/downsample.py else: assert pad == 0 if stride >= pool_size: output_length = (input_length + stride - 1) // stride else: output_length = max( 0, (input_length - pool_size + stride - 1) // stride) + 1 return output_length def pool_2d(input, **kwargs): """ Wrapper function that calls :func:`theano.tensor.signal.pool_2d` either with the new or old keyword argument names expected by Theano. """ try: return T.signal.pool.pool_2d(input, **kwargs) except TypeError: # pragma: no cover # convert from new to old interface kwargs['ds'] = kwargs.pop('ws') kwargs['st'] = kwargs.pop('stride') kwargs['padding'] = kwargs.pop('pad') return T.signal.pool.pool_2d(input, **kwargs) def pool_3d(input, **kwargs): # pragma: no cover """ Wrapper function that calls :func:`theano.tensor.signal.pool_3d` either with the new or old keyword argument names expected by Theano. """ try: return T.signal.pool.pool_3d(input, **kwargs) except TypeError: # pragma: no cover # convert from new to old interface kwargs['ds'] = kwargs.pop('ws') kwargs['st'] = kwargs.pop('stride') kwargs['padding'] = kwargs.pop('pad') return T.signal.pool.pool_3d(input, **kwargs) class Pool1DLayer(Layer): """ 1D pooling layer Performs 1D mean or max-pooling over the trailing axis of a 3D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer or iterable The length of the pooling region. If an iterable, it should have a single element. stride : integer, iterable or ``None`` The stride between sucessive pooling regions. If ``None`` then ``stride == pool_size``. pad : integer or iterable The number of elements to be added to the input on each side. Must be less than stride. ignore_border : bool If ``True``, partial pooling regions will be ignored. Must be ``True`` if ``pad != 0``. mode : {'max', 'average_inc_pad', 'average_exc_pad'} Pooling mode: max-pooling or mean-pooling including/excluding zeros from partially padded pooling regions. Default is 'max'. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. See Also -------- MaxPool1DLayer : Shortcut for max pooling layer. Notes ----- The value used to pad the input is chosen to be less than the minimum of the input, so that the output of each pooling region always corresponds to some element in the unpadded input region. Using ``ignore_border=False`` prevents Theano from using cuDNN for the operation, so it will fall back to a slower implementation. """ def __init__(self, incoming, pool_size, stride=None, pad=0, ignore_border=True, mode='max', **kwargs): super(Pool1DLayer, self).__init__(incoming, **kwargs) if len(self.input_shape) != 3: raise ValueError("Tried to create a 1D pooling layer with " "input shape %r. Expected 3 input dimensions " "(batchsize, channels, 1 spatial dimensions)." % (self.input_shape,)) self.pool_size = as_tuple(pool_size, 1) self.stride = self.pool_size if stride is None else as_tuple(stride, 1) self.pad = as_tuple(pad, 1) self.ignore_border = ignore_border self.mode = mode def get_output_shape_for(self, input_shape): output_shape = list(input_shape) # copy / convert to mutable list output_shape[-1] = pool_output_length(input_shape[-1], pool_size=self.pool_size[0], stride=self.stride[0], pad=self.pad[0], ignore_border=self.ignore_border, ) return tuple(output_shape) def get_output_for(self, input, **kwargs): input_4d = T.shape_padright(input, 1) pooled = pool_2d(input_4d, ws=(self.pool_size[0], 1), stride=(self.stride[0], 1), ignore_border=self.ignore_border, pad=(self.pad[0], 0), mode=self.mode, ) return pooled[:, :, :, 0] class Pool2DLayer(Layer): """ 2D pooling layer Performs 2D mean or max-pooling over the two trailing axes of a 4D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer or iterable The length of the pooling region in each dimension. If an integer, it is promoted to a square pooling region. If an iterable, it should have two elements. stride : integer, iterable or ``None`` The strides between sucessive pooling regions in each dimension. If ``None`` then ``stride = pool_size``. pad : integer or iterable Number of elements to be added on each side of the input in each dimension. Each value must be less than the corresponding stride. ignore_border : bool If ``True``, partial pooling regions will be ignored. Must be ``True`` if ``pad != (0, 0)``. mode : {'max', 'average_inc_pad', 'average_exc_pad'} Pooling mode: max-pooling or mean-pooling including/excluding zeros from partially padded pooling regions. Default is 'max'. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. See Also -------- MaxPool2DLayer : Shortcut for max pooling layer. Notes ----- The value used to pad the input is chosen to be less than the minimum of the input, so that the output of each pooling region always corresponds to some element in the unpadded input region. Using ``ignore_border=False`` prevents Theano from using cuDNN for the operation, so it will fall back to a slower implementation. """ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0), ignore_border=True, mode='max', **kwargs): super(Pool2DLayer, self).__init__(incoming, **kwargs) self.pool_size = as_tuple(pool_size, 2) if len(self.input_shape) != 4: raise ValueError("Tried to create a 2D pooling layer with " "input shape %r. Expected 4 input dimensions " "(batchsize, channels, 2 spatial dimensions)." % (self.input_shape,)) if stride is None: self.stride = self.pool_size else: self.stride = as_tuple(stride, 2) self.pad = as_tuple(pad, 2) self.ignore_border = ignore_border self.mode = mode def get_output_shape_for(self, input_shape): output_shape = list(input_shape) # copy / convert to mutable list output_shape[2] = pool_output_length(input_shape[2], pool_size=self.pool_size[0], stride=self.stride[0], pad=self.pad[0], ignore_border=self.ignore_border, ) output_shape[3] = pool_output_length(input_shape[3], pool_size=self.pool_size[1], stride=self.stride[1], pad=self.pad[1], ignore_border=self.ignore_border, ) return tuple(output_shape) def get_output_for(self, input, **kwargs): pooled = pool_2d(input, ws=self.pool_size, stride=self.stride, ignore_border=self.ignore_border, pad=self.pad, mode=self.mode, ) return pooled class Pool3DLayer(Layer): # pragma: no cover """ 3D pooling layer Performs 3D mean or max-pooling over the three trailing axes of a 5D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer or iterable The length of the pooling region in each dimension. If an integer, it is promoted to a cubic pooling region. If an iterable, it should have three elements. stride : integer, iterable or ``None`` The strides between sucessive pooling regions in each dimension. If ``None`` then ``stride = pool_size``. pad : integer or iterable Number of elements to be added on each side of the input in each dimension. Each value must be less than the corresponding stride. ignore_border : bool If ``True``, partial pooling regions will be ignored. Must be ``True`` if ``pad != (0, 0, 0)``. mode : {'max', 'average_inc_pad', 'average_exc_pad'} Pooling mode: max-pooling or mean-pooling including/excluding zeros from partially padded pooling regions. Default is 'max'. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. See Also -------- MaxPool3DLayer : Shortcut for max pooling layer. Notes ----- The value used to pad the input is chosen to be less than the minimum of the input, so that the output of each pooling region always corresponds to some element in the unpadded input region. Using ``ignore_border=False`` prevents Theano from using cuDNN for the operation, so it will fall back to a slower implementation. """ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0, 0), ignore_border=True, mode='max', **kwargs): super(Pool3DLayer, self).__init__(incoming, **kwargs) self.pool_size = as_tuple(pool_size, 3) if len(self.input_shape) != 5: raise ValueError("Tried to create a 3D pooling layer with " "input shape %r. Expected 5 input dimensions " "(batchsize, channels, 3 spatial dim)." % (self.input_shape,)) if stride is None: self.stride = self.pool_size else: self.stride = as_tuple(stride, 3) self.pad = as_tuple(pad, 3) self.ignore_border = ignore_border self.mode = mode def get_output_shape_for(self, input_shape): output_shape = list(input_shape) # copy / convert to mutable list output_shape[2] = pool_output_length(input_shape[2], pool_size=self.pool_size[0], stride=self.stride[0], pad=self.pad[0], ignore_border=self.ignore_border, ) output_shape[3] = pool_output_length(input_shape[3], pool_size=self.pool_size[1], stride=self.stride[1], pad=self.pad[1], ignore_border=self.ignore_border, ) output_shape[4] = pool_output_length(input_shape[4], pool_size=self.pool_size[2], stride=self.stride[2], pad=self.pad[2], ignore_border=self.ignore_border, ) return tuple(output_shape) def get_output_for(self, input, **kwargs): pooled = pool_3d(input, ws=self.pool_size, stride=self.stride, ignore_border=self.ignore_border, pad=self.pad, mode=self.mode, ) return pooled class MaxPool1DLayer(Pool1DLayer): """ 1D max-pooling layer Performs 1D max-pooling over the trailing axis of a 3D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer or iterable The length of the pooling region. If an iterable, it should have a single element. stride : integer, iterable or ``None`` The stride between sucessive pooling regions. If ``None`` then ``stride == pool_size``. pad : integer or iterable The number of elements to be added to the input on each side. Must be less than stride. ignore_border : bool If ``True``, partial pooling regions will be ignored. Must be ``True`` if ``pad != 0``. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- The value used to pad the input is chosen to be less than the minimum of the input, so that the output of each pooling region always corresponds to some element in the unpadded input region. Using ``ignore_border=False`` prevents Theano from using cuDNN for the operation, so it will fall back to a slower implementation. """ def __init__(self, incoming, pool_size, stride=None, pad=0, ignore_border=True, **kwargs): super(MaxPool1DLayer, self).__init__(incoming, pool_size, stride, pad, ignore_border, mode='max', **kwargs) class MaxPool2DLayer(Pool2DLayer): """ 2D max-pooling layer Performs 2D max-pooling over the two trailing axes of a 4D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer or iterable The length of the pooling region in each dimension. If an integer, it is promoted to a square pooling region. If an iterable, it should have two elements. stride : integer, iterable or ``None`` The strides between sucessive pooling regions in each dimension. If ``None`` then ``stride = pool_size``. pad : integer or iterable Number of elements to be added on each side of the input in each dimension. Each value must be less than the corresponding stride. ignore_border : bool If ``True``, partial pooling regions will be ignored. Must be ``True`` if ``pad != (0, 0)``. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- The value used to pad the input is chosen to be less than the minimum of the input, so that the output of each pooling region always corresponds to some element in the unpadded input region. Using ``ignore_border=False`` prevents Theano from using cuDNN for the operation, so it will fall back to a slower implementation. """ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0), ignore_border=True, **kwargs): super(MaxPool2DLayer, self).__init__(incoming, pool_size, stride, pad, ignore_border, mode='max', **kwargs) # TODO: add reshape-based implementation to MaxPool*DLayer class MaxPool3DLayer(Pool3DLayer): # pragma: no cover """ 3D max-pooling layer Performs 3D max-pooling over the three trailing axes of a 5D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer or iterable The length of the pooling region in each dimension. If an integer, it is promoted to a cubic pooling region. If an iterable, it should have three elements. stride : integer, iterable or ``None`` The strides between sucessive pooling regions in each dimension. If ``None`` then ``stride = pool_size``. pad : integer or iterable Number of elements to be added on each side of the input in each dimension. Each value must be less than the corresponding stride. ignore_border : bool If ``True``, partial pooling regions will be ignored. Must be ``True`` if ``pad != (0, 0, 0)``. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- The value used to pad the input is chosen to be less than the minimum of the input, so that the output of each pooling region always corresponds to some element in the unpadded input region. Using ``ignore_border=False`` prevents Theano from using cuDNN for the operation, so it will fall back to a slower implementation. """ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0, 0), ignore_border=True, **kwargs): super(MaxPool3DLayer, self).__init__(incoming, pool_size, stride, pad, ignore_border, mode='max', **kwargs) if not hasattr(T.signal.pool, 'pool_3d'): # pragma: no cover # Hide Pool3DLayer/MaxPool3DLayer for old Theano versions del Pool3DLayer, MaxPool3DLayer __all__.remove('Pool3DLayer') __all__.remove('MaxPool3DLayer') class Upscale1DLayer(Layer): """ 1D upscaling layer Performs 1D upscaling over the trailing axis of a 3D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. scale_factor : integer or iterable The scale factor. If an iterable, it should have one element. mode : {'repeat', 'dilate'} Upscaling mode: repeat element values or upscale leaving zeroes between upscaled elements. Default is 'repeat'. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. """ def __init__(self, incoming, scale_factor, mode='repeat', **kwargs): super(Upscale1DLayer, self).__init__(incoming, **kwargs) self.scale_factor = as_tuple(scale_factor, 1) if self.scale_factor[0] < 1: raise ValueError('Scale factor must be >= 1, not {0}'.format( self.scale_factor)) if mode not in {'repeat', 'dilate'}: msg = "Mode must be either 'repeat' or 'dilate', not {0}" raise ValueError(msg.format(mode)) self.mode = mode def get_output_shape_for(self, input_shape): output_shape = list(input_shape) # copy / convert to mutable list if output_shape[2] is not None: output_shape[2] *= self.scale_factor[0] return tuple(output_shape) def get_output_for(self, input, **kwargs): a, = self.scale_factor upscaled = input if self.mode == 'repeat': if a > 1: upscaled = T.extra_ops.repeat(upscaled, a, 2) elif self.mode == 'dilate': if a > 1: output_shape = self.get_output_shape_for(input.shape) upscaled = T.zeros(shape=output_shape, dtype=input.dtype) upscaled = T.set_subtensor(upscaled[:, :, ::a], input) return upscaled class Upscale2DLayer(Layer): """ 2D upscaling layer Performs 2D upscaling over the two trailing axes of a 4D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. scale_factor : integer or iterable The scale factor in each dimension. If an integer, it is promoted to a square scale factor region. If an iterable, it should have two elements. mode : {'repeat', 'dilate'} Upscaling mode: repeat element values or upscale leaving zeroes between upscaled elements. Default is 'repeat'. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- Using ``mode='dilate'`` followed by a convolution can be realized more efficiently with a transposed convolution, see :class:`lasagne.layers.TransposedConv2DLayer`. """ def __init__(self, incoming, scale_factor, mode='repeat', **kwargs): super(Upscale2DLayer, self).__init__(incoming, **kwargs) self.scale_factor = as_tuple(scale_factor, 2) if self.scale_factor[0] < 1 or self.scale_factor[1] < 1: raise ValueError('Scale factor must be >= 1, not {0}'.format( self.scale_factor)) if mode not in {'repeat', 'dilate'}: msg = "Mode must be either 'repeat' or 'dilate', not {0}" raise ValueError(msg.format(mode)) self.mode = mode def get_output_shape_for(self, input_shape): output_shape = list(input_shape) # copy / convert to mutable list if output_shape[2] is not None: output_shape[2] *= self.scale_factor[0] if output_shape[3] is not None: output_shape[3] *= self.scale_factor[1] return tuple(output_shape) def get_output_for(self, input, **kwargs): a, b = self.scale_factor upscaled = input if self.mode == 'repeat': if b > 1: upscaled = T.extra_ops.repeat(upscaled, b, 3) if a > 1: upscaled = T.extra_ops.repeat(upscaled, a, 2) elif self.mode == 'dilate': if b > 1 or a > 1: output_shape = self.get_output_shape_for(input.shape) upscaled = T.zeros(shape=output_shape, dtype=input.dtype) upscaled = T.set_subtensor(upscaled[:, :, ::a, ::b], input) return upscaled class Upscale3DLayer(Layer): """ 3D upscaling layer Performs 3D upscaling over the three trailing axes of a 5D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. scale_factor : integer or iterable The scale factor in each dimension. If an integer, it is promoted to a cubic scale factor region. If an iterable, it should have three elements. mode : {'repeat', 'dilate'} Upscaling mode: repeat element values or upscale leaving zeroes between upscaled elements. Default is 'repeat'. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. """ def __init__(self, incoming, scale_factor, mode='repeat', **kwargs): super(Upscale3DLayer, self).__init__(incoming, **kwargs) self.scale_factor = as_tuple(scale_factor, 3) if self.scale_factor[0] < 1 or self.scale_factor[1] < 1 or \ self.scale_factor[2] < 1: raise ValueError('Scale factor must be >= 1, not {0}'.format( self.scale_factor)) if mode not in {'repeat', 'dilate'}: msg = "Mode must be either 'repeat' or 'dilate', not {0}" raise ValueError(msg.format(mode)) self.mode = mode def get_output_shape_for(self, input_shape): output_shape = list(input_shape) # copy / convert to mutable list if output_shape[2] is not None: output_shape[2] *= self.scale_factor[0] if output_shape[3] is not None: output_shape[3] *= self.scale_factor[1] if output_shape[4] is not None: output_shape[4] *= self.scale_factor[2] return tuple(output_shape) def get_output_for(self, input, **kwargs): a, b, c = self.scale_factor upscaled = input if self.mode == 'repeat': if c > 1: upscaled = T.extra_ops.repeat(upscaled, c, 4) if b > 1: upscaled = T.extra_ops.repeat(upscaled, b, 3) if a > 1: upscaled = T.extra_ops.repeat(upscaled, a, 2) elif self.mode == 'dilate': if c > 1 or b > 1 or a > 1: output_shape = self.get_output_shape_for(input.shape) upscaled = T.zeros(shape=output_shape, dtype=input.dtype) upscaled = T.set_subtensor( upscaled[:, :, ::a, ::b, ::c], input) return upscaled class FeaturePoolLayer(Layer): """ lasagne.layers.FeaturePoolLayer(incoming, pool_size, axis=1, pool_function=theano.tensor.max, **kwargs) Feature pooling layer This layer pools across a given axis of the input. By default this is axis 1, which corresponds to the feature axis for :class:`DenseLayer`, :class:`Conv1DLayer` and :class:`Conv2DLayer`. The layer can be used to implement maxout. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer the size of the pooling regions, i.e. the number of features / feature maps to be pooled together. axis : integer the axis along which to pool. The default value of ``1`` works for :class:`DenseLayer`, :class:`Conv1DLayer` and :class:`Conv2DLayer`. pool_function : callable the pooling function to use. This defaults to `theano.tensor.max` (i.e. max-pooling) and can be replaced by any other aggregation function. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- This layer requires that the size of the axis along which it pools is a multiple of the pool size. """ def __init__(self, incoming, pool_size, axis=1, pool_function=T.max, **kwargs): super(FeaturePoolLayer, self).__init__(incoming, **kwargs) self.pool_size = pool_size self.axis = axis self.pool_function = pool_function num_feature_maps = self.input_shape[self.axis] if num_feature_maps % self.pool_size != 0: raise ValueError("Number of input feature maps (%d) is not a " "multiple of the pool size (pool_size=%d)" % (num_feature_maps, self.pool_size)) def get_output_shape_for(self, input_shape): output_shape = list(input_shape) # make a mutable copy output_shape[self.axis] = input_shape[self.axis] // self.pool_size return tuple(output_shape) def get_output_for(self, input, **kwargs): input_shape = tuple(input.shape) num_feature_maps = input_shape[self.axis] num_feature_maps_out = num_feature_maps // self.pool_size pool_shape = (input_shape[:self.axis] + (num_feature_maps_out, self.pool_size) + input_shape[self.axis+1:]) input_reshaped = input.reshape(pool_shape) return self.pool_function(input_reshaped, axis=self.axis + 1) class FeatureWTALayer(Layer): """ 'Winner Take All' layer This layer performs 'Winner Take All' (WTA) across feature maps: zero out all but the maximal activation value within a region. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer the number of feature maps per region. axis : integer the axis along which the regions are formed. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- This layer requires that the size of the axis along which it groups units is a multiple of the pool size. """ def __init__(self, incoming, pool_size, axis=1, **kwargs): super(FeatureWTALayer, self).__init__(incoming, **kwargs) self.pool_size = pool_size self.axis = axis num_feature_maps = self.input_shape[self.axis] if num_feature_maps % self.pool_size != 0: raise ValueError("Number of input feature maps (%d) is not a " "multiple of the region size (pool_size=%d)" % (num_feature_maps, self.pool_size)) def get_output_for(self, input, **kwargs): num_feature_maps = input.shape[self.axis] num_pools = num_feature_maps // self.pool_size pool_shape = () arange_shuffle_pattern = () for k in range(self.axis): pool_shape += (input.shape[k],) arange_shuffle_pattern += ('x',) pool_shape += (num_pools, self.pool_size) arange_shuffle_pattern += ('x', 0) for k in range(self.axis + 1, input.ndim): pool_shape += (input.shape[k],) arange_shuffle_pattern += ('x',) input_reshaped = input.reshape(pool_shape) max_indices = T.argmax(input_reshaped, axis=self.axis + 1, keepdims=True) arange = T.arange(self.pool_size).dimshuffle(*arange_shuffle_pattern) mask = T.eq(max_indices, arange).reshape(input.shape) return input * mask class GlobalPoolLayer(Layer): """ lasagne.layers.GlobalPoolLayer(incoming, pool_function=theano.tensor.mean, **kwargs) Global pooling layer This layer pools globally across all trailing dimensions beyond the 2nd. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_function : callable the pooling function to use. This defaults to `theano.tensor.mean` (i.e. mean-pooling) and can be replaced by any other aggregation function. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. """ def __init__(self, incoming, pool_function=T.mean, **kwargs): super(GlobalPoolLayer, self).__init__(incoming, **kwargs) self.pool_function = pool_function def get_output_shape_for(self, input_shape): return input_shape[:2] def get_output_for(self, input, **kwargs): return self.pool_function(input.flatten(3), axis=2) def pool_2d_nxn_regions(inputs, output_size, mode='max'): """ Performs a pooling operation that results in a fixed size: output_size x output_size. Used by SpatialPyramidPoolingLayer. Refer to appendix A in [1] Parameters ---------- inputs : a tensor with 4 dimensions (N x C x H x W) output_size: integer The output size of the pooling operation mode : string Pooling mode, one of 'max', 'average_inc_pad', 'average_exc_pad' Defaults to 'max'. Returns a list of tensors, for each output bin. The list contains output_size*output_size elements, where each element is a 3D tensor (N x C x 1) References ---------- .. [1] He, Kaiming et al (2015): Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition. http://arxiv.org/pdf/1406.4729.pdf. """ if mode == 'max': pooling_op = T.max elif mode in ['average_inc_pad', 'average_exc_pad']: pooling_op = T.mean else: msg = "Mode must be either 'max', 'average_inc_pad' or " msg += "'average_exc_pad'. Got '{0}'" raise ValueError(msg.format(mode)) h, w = inputs.shape[2:] result = [] n = float(output_size) for row in range(output_size): for col in range(output_size): start_h = T.floor(row / n * h).astype('int32') end_h = T.ceil((row + 1) / n * h).astype('int32') start_w = T.floor(col / n * w).astype('int32') end_w = T.ceil((col + 1) / n * w).astype('int32') pooling_region = inputs[:, :, start_h:end_h, start_w:end_w] this_result = pooling_op(pooling_region, axis=(2, 3)) result.append(this_result.dimshuffle(0, 1, 'x')) return result class SpatialPyramidPoolingLayer(Layer): """ Spatial Pyramid Pooling Layer Performs spatial pyramid pooling (SPP) over the input. It will turn a 2D input of arbitrary size into an output of fixed dimension. Hence, the convolutional part of a DNN can be connected to a dense part with a fixed number of nodes even if the dimensions of the input image are unknown. The pooling is performed over :math:`l` pooling levels. Each pooling level :math:`i` will create :math:`M_i` output features. :math:`M_i` is given by :math:`n_i * n_i`, with :math:`n_i` as the number of pooling operation per dimension in level :math:`i`, and we use a list of the :math:`n_i`'s as a parameter for SPP-Layer. The length of this list is the level of the spatial pyramid. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_dims : list of integers The list of :math:`n_i`'s that define the output dimension of each pooling level :math:`i`. The length of pool_dims is the level of the spatial pyramid. mode : string Pooling mode, one of 'max', 'average_inc_pad', 'average_exc_pad' Defaults to 'max'. implementation : string Either 'fast' or 'kaiming'. The 'fast' version uses theano's pool_2d operation, which is fast but does not work for all input sizes. The 'kaiming' mode is slower but implements the pooling as described in [1], and works with any input size. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- This layer should be inserted between the convolutional part of a DNN and its dense part. Convolutions can be used for arbitrary input dimensions, but the size of their output will depend on their input dimensions. Connecting the output of the convolutional to the dense part then usually demands us to fix the dimensions of the network's InputLayer. The spatial pyramid pooling layer, however, allows us to leave the network input dimensions arbitrary. The advantage over a global pooling layer is the added robustness against object deformations due to the pooling on different scales. References ---------- .. [1] He, Kaiming et al (2015): Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition. http://arxiv.org/pdf/1406.4729.pdf. """ def __init__(self, incoming, pool_dims=[4, 2, 1], mode='max', implementation='fast', **kwargs): super(SpatialPyramidPoolingLayer, self).__init__(incoming, **kwargs) if len(self.input_shape) != 4: raise ValueError("Tried to create a SPP layer with " "input shape %r. Expected 4 input dimensions " "(batchsize, channels, 2 spatial dimensions)." % (self.input_shape,)) if implementation != 'kaiming': # pragma: no cover # Check if the running theano version supports symbolic # variables as arguments for pool_2d. This is required # unless using implementation='kaiming' try: pool_2d(T.tensor4(), ws=T.ivector(), stride=T.ivector(), ignore_border=True, pad=None) except ValueError: raise ImportError("SpatialPyramidPoolingLayer with " "implementation='%s' requires a newer " "version of theano. Either update " "theano, or use implementation=" "'kaiming'" % implementation) self.mode = mode self.implementation = implementation self.pool_dims = pool_dims def get_output_for(self, input, **kwargs): input_size = tuple(symb if fixed is None else fixed for fixed, symb in zip(self.input_shape[2:], input.shape[2:])) pool_list = [] for pool_dim in self.pool_dims: if self.implementation == 'kaiming': pool_list += pool_2d_nxn_regions(input, pool_dim, mode=self.mode) else: # pragma: no cover win_size = tuple((i + pool_dim - 1) // pool_dim for i in input_size) str_size = tuple(i // pool_dim for i in input_size) pool = pool_2d(input, ws=win_size, stride=str_size, mode=self.mode, pad=None, ignore_border=True) pool = pool.flatten(3) pool_list.append(pool) return T.concatenate(pool_list, axis=2) def get_output_shape_for(self, input_shape): num_features = sum(p*p for p in self.pool_dims) return (input_shape[0], input_shape[1], num_features) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/recurrent.py0000644000175000017500000020370413307306052026361 0ustar sinclairssinclairs# -*- coding: utf-8 -*- """ Layers to construct recurrent networks. Recurrent layers can be used similarly to feed-forward layers except that the input shape is expected to be ``(batch_size, sequence_length, num_inputs)``. The CustomRecurrentLayer can also support more than one "feature" dimension (e.g. using convolutional connections), but for all other layers, dimensions trailing the third dimension are flattened. The following recurrent layers are implemented: .. currentmodule:: lasagne.layers .. autosummary:: :nosignatures: CustomRecurrentLayer RecurrentLayer LSTMLayer GRULayer For recurrent layers with gates we use a helper class to set up the parameters in each gate: .. autosummary:: :nosignatures: Gate Please refer to that class if you need to modify initial conditions of gates. Recurrent layers and feed-forward layers can be combined in the same network by using a few reshape operations; please refer to the example below. Examples -------- The following example demonstrates how recurrent layers can be easily mixed with feed-forward layers using :class:`ReshapeLayer` and how to build a network with variable batch size and number of time steps. >>> from lasagne.layers import * >>> num_inputs, num_units, num_classes = 10, 12, 5 >>> # By setting the first two dimensions as None, we are allowing them to vary >>> # They correspond to batch size and sequence length, so we will be able to >>> # feed in batches of varying size with sequences of varying length. >>> l_inp = InputLayer((None, None, num_inputs)) >>> # We can retrieve symbolic references to the input variable's shape, which >>> # we will later use in reshape layers. >>> batchsize, seqlen, _ = l_inp.input_var.shape >>> l_lstm = LSTMLayer(l_inp, num_units=num_units) >>> # In order to connect a recurrent layer to a dense layer, we need to >>> # flatten the first two dimensions (our "sample dimensions"); this will >>> # cause each time step of each sequence to be processed independently >>> l_shp = ReshapeLayer(l_lstm, (-1, num_units)) >>> l_dense = DenseLayer(l_shp, num_units=num_classes) >>> # To reshape back to our original shape, we can use the symbolic shape >>> # variables we retrieved above. >>> l_out = ReshapeLayer(l_dense, (batchsize, seqlen, num_classes)) """ import numpy as np import theano import theano.tensor as T from .. import nonlinearities from .. import init from ..utils import unroll_scan from .base import MergeLayer, Layer from .input import InputLayer from .dense import DenseLayer from . import helper __all__ = [ "CustomRecurrentLayer", "RecurrentLayer", "Gate", "LSTMLayer", "GRULayer" ] class CustomRecurrentLayer(MergeLayer): """ lasagne.layers.recurrent.CustomRecurrentLayer(incoming, input_to_hidden, hidden_to_hidden, nonlinearity=lasagne.nonlinearities.rectify, hid_init=lasagne.init.Constant(0.), backwards=False, learn_init=False, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False, **kwargs) A layer which implements a recurrent connection. This layer allows you to specify custom input-to-hidden and hidden-to-hidden connections by instantiating :class:`lasagne.layers.Layer` instances and passing them on initialization. Note that these connections can consist of multiple layers chained together. The output shape for the provided input-to-hidden and hidden-to-hidden connections must be the same. If you are looking for a standard, densely-connected recurrent layer, please see :class:`RecurrentLayer`. The output is computed by .. math :: h_t = \sigma(f_i(x_t) + f_h(h_{t-1})) Parameters ---------- incoming : a :class:`lasagne.layers.Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. input_to_hidden : :class:`lasagne.layers.Layer` :class:`lasagne.layers.Layer` instance which connects input to the hidden state (:math:`f_i`). This layer may be connected to a chain of layers, which must end in a :class:`lasagne.layers.InputLayer` with the same input shape as `incoming`, except for the first dimension: When ``precompute_input == True`` (the default), it must be ``incoming.output_shape[0]*incoming.output_shape[1]`` or ``None``; when ``precompute_input == False``, it must be ``incoming.output_shape[0]`` or ``None``. hidden_to_hidden : :class:`lasagne.layers.Layer` Layer which connects the previous hidden state to the new state (:math:`f_h`). This layer may be connected to a chain of layers, which must end in a :class:`lasagne.layers.InputLayer` with the same input shape as `hidden_to_hidden`'s output shape. nonlinearity : callable or None Nonlinearity to apply when computing new state (:math:`\sigma`). If None is provided, no nonlinearity will be applied. hid_init : callable, np.ndarray, theano.shared or :class:`Layer` Initializer for initial hidden state (:math:`h_0`). backwards : bool If True, process the sequence backwards and then reverse the output again such that the output from the layer is always from :math:`x_1` to :math:`x_n`. learn_init : bool If True, initial hidden values are learned. gradient_steps : int Number of timesteps to include in the backpropagated gradient. If -1, backpropagate through the entire sequence. grad_clipping : float If nonzero, the gradient messages are clipped to the given value during the backward pass. See [1]_ (p. 6) for further explanation. unroll_scan : bool If True the recursion is unrolled instead of using scan. For some graphs this gives a significant speed up but it might also consume more memory. When `unroll_scan` is True, backpropagation always includes the full sequence, so `gradient_steps` must be set to -1 and the input sequence length must be known at compile time (i.e., cannot be given as None). precompute_input : bool If True, precompute input_to_hid before iterating through the sequence. This can result in a speedup at the expense of an increase in memory usage. mask_input : :class:`lasagne.layers.Layer` Layer which allows for a sequence mask to be input, for when sequences are of variable length. Default `None`, which means no mask will be supplied (i.e. all sequences are of the same length). only_return_final : bool If True, only return the final sequential output (e.g. for tasks where a single target value for the entire sequence is desired). In this case, Theano makes an optimization which saves memory. Examples -------- The following example constructs a simple `CustomRecurrentLayer` which has dense input-to-hidden and hidden-to-hidden connections. >>> import lasagne >>> n_batch, n_steps, n_in = (2, 3, 4) >>> n_hid = 5 >>> l_in = lasagne.layers.InputLayer((n_batch, n_steps, n_in)) >>> l_in_hid = lasagne.layers.DenseLayer( ... lasagne.layers.InputLayer((None, n_in)), n_hid) >>> l_hid_hid = lasagne.layers.DenseLayer( ... lasagne.layers.InputLayer((None, n_hid)), n_hid) >>> l_rec = lasagne.layers.CustomRecurrentLayer(l_in, l_in_hid, l_hid_hid) The CustomRecurrentLayer can also support "convolutional recurrence", as is demonstrated below. >>> n_batch, n_steps, n_channels, width, height = (2, 3, 4, 5, 6) >>> n_out_filters = 7 >>> filter_shape = (3, 3) >>> l_in = lasagne.layers.InputLayer( ... (n_batch, n_steps, n_channels, width, height)) >>> l_in_to_hid = lasagne.layers.Conv2DLayer( ... lasagne.layers.InputLayer((None, n_channels, width, height)), ... n_out_filters, filter_shape, pad='same') >>> l_hid_to_hid = lasagne.layers.Conv2DLayer( ... lasagne.layers.InputLayer(l_in_to_hid.output_shape), ... n_out_filters, filter_shape, pad='same') >>> l_rec = lasagne.layers.CustomRecurrentLayer( ... l_in, l_in_to_hid, l_hid_to_hid) References ---------- .. [1] Graves, Alex: "Generating sequences with recurrent neural networks." arXiv preprint arXiv:1308.0850 (2013). """ def __init__(self, incoming, input_to_hidden, hidden_to_hidden, nonlinearity=nonlinearities.rectify, hid_init=init.Constant(0.), backwards=False, learn_init=False, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False, **kwargs): # This layer inherits from a MergeLayer, because it can have three # inputs - the layer input, the mask and the initial hidden state. We # will just provide the layer input as incomings, unless a mask input # or initial hidden state was provided. incomings = [incoming] self.mask_incoming_index = -1 self.hid_init_incoming_index = -1 if mask_input is not None: incomings.append(mask_input) self.mask_incoming_index = len(incomings)-1 if isinstance(hid_init, Layer): incomings.append(hid_init) self.hid_init_incoming_index = len(incomings)-1 super(CustomRecurrentLayer, self).__init__(incomings, **kwargs) input_to_hidden_in_layers = \ [layer for layer in helper.get_all_layers(input_to_hidden) if isinstance(layer, InputLayer)] if len(input_to_hidden_in_layers) != 1: raise ValueError( '`input_to_hidden` must have exactly one InputLayer, but it ' 'has {}'.format(len(input_to_hidden_in_layers))) hidden_to_hidden_in_lyrs = \ [layer for layer in helper.get_all_layers(hidden_to_hidden) if isinstance(layer, InputLayer)] if len(hidden_to_hidden_in_lyrs) != 1: raise ValueError( '`hidden_to_hidden` must have exactly one InputLayer, but it ' 'has {}'.format(len(hidden_to_hidden_in_lyrs))) hidden_to_hidden_in_layer = hidden_to_hidden_in_lyrs[0] self.input_to_hidden = input_to_hidden self.hidden_to_hidden = hidden_to_hidden self.learn_init = learn_init self.backwards = backwards self.gradient_steps = gradient_steps self.grad_clipping = grad_clipping self.unroll_scan = unroll_scan self.precompute_input = precompute_input self.only_return_final = only_return_final if unroll_scan and gradient_steps != -1: raise ValueError( "Gradient steps must be -1 when unroll_scan is true.") # Retrieve the dimensionality of the incoming layer input_shape = self.input_shapes[0] if unroll_scan and input_shape[1] is None: raise ValueError("Input sequence length cannot be specified as " "None when unroll_scan is True") # Check that the input_to_hidden connection can appropriately handle # a first dimension of input_shape[0]*input_shape[1] when we will # precompute the input dot product if (self.precompute_input and input_to_hidden.output_shape[0] is not None and input_shape[0] is not None and input_shape[1] is not None and (input_to_hidden.output_shape[0] != input_shape[0]*input_shape[1])): raise ValueError( 'When precompute_input == True, ' 'input_to_hidden.output_shape[0] must equal ' 'incoming.output_shape[0]*incoming.output_shape[1] ' '(i.e. batch_size*sequence_length) or be None but ' 'input_to_hidden.output_shape[0] = {} and ' 'incoming.output_shape[0]*incoming.output_shape[1] = ' '{}'.format(input_to_hidden.output_shape[0], input_shape[0]*input_shape[1])) # Check that the first dimension of input_to_hidden and # hidden_to_hidden's outputs match when we won't precompute the input # dot product if (not self.precompute_input and input_to_hidden.output_shape[0] is not None and hidden_to_hidden.output_shape[0] is not None and (input_to_hidden.output_shape[0] != hidden_to_hidden.output_shape[0])): raise ValueError( 'When precompute_input == False, ' 'input_to_hidden.output_shape[0] must equal ' 'hidden_to_hidden.output_shape[0] but ' 'input_to_hidden.output_shape[0] = {} and ' 'hidden_to_hidden.output_shape[0] = {}'.format( input_to_hidden.output_shape[0], hidden_to_hidden.output_shape[0])) # Check that input_to_hidden and hidden_to_hidden output shapes match, # but don't check a dimension if it's None for either shape if not all(s1 is None or s2 is None or s1 == s2 for s1, s2 in zip(input_to_hidden.output_shape[1:], hidden_to_hidden.output_shape[1:])): raise ValueError("The output shape for input_to_hidden and " "hidden_to_hidden must be equal after the first " "dimension, but input_to_hidden.output_shape={} " "and hidden_to_hidden.output_shape={}".format( input_to_hidden.output_shape, hidden_to_hidden.output_shape)) # Check that input_to_hidden's output shape is the same as # hidden_to_hidden's input shape but don't check a dimension if it's # None for either shape h_to_h_input_shape = hidden_to_hidden_in_layer.output_shape if not all(s1 is None or s2 is None or s1 == s2 for s1, s2 in zip(input_to_hidden.output_shape[1:], h_to_h_input_shape[1:])): raise ValueError( "The output shape for input_to_hidden must be equal to the " "input shape of hidden_to_hidden after the first dimension, " "but input_to_hidden.output_shape={} and " "hidden_to_hidden:input_layer.shape={}".format( input_to_hidden.output_shape, h_to_h_input_shape)) if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity # Initialize hidden state if isinstance(hid_init, Layer): self.hid_init = hid_init else: self.hid_init = self.add_param( hid_init, (1,) + hidden_to_hidden.output_shape[1:], name="hid_init", trainable=learn_init, regularizable=False) def get_params(self, **tags): # Get all parameters from this layer, the master layer params = super(CustomRecurrentLayer, self).get_params(**tags) # Combine with all parameters from the child layers params += helper.get_all_params(self.input_to_hidden, **tags) params += helper.get_all_params(self.hidden_to_hidden, **tags) return params def get_output_shape_for(self, input_shapes): # The shape of the input to this layer will be the first element # of input_shapes, whether or not a mask input is being used. input_shape = input_shapes[0] # When only_return_final is true, the second (sequence step) dimension # will be flattened if self.only_return_final: return (input_shape[0],) + self.hidden_to_hidden.output_shape[1:] # Otherwise, the shape will be (n_batch, n_steps, trailing_dims...) else: return ((input_shape[0], input_shape[1]) + self.hidden_to_hidden.output_shape[1:]) def get_output_for(self, inputs, **kwargs): """ Compute this layer's output function given a symbolic input variable. Parameters ---------- inputs : list of theano.TensorType `inputs[0]` should always be the symbolic input variable. When this layer has a mask input (i.e. was instantiated with `mask_input != None`, indicating that the lengths of sequences in each batch vary), `inputs` should have length 2, where `inputs[1]` is the `mask`. The `mask` should be supplied as a Theano variable denoting whether each time step in each sequence in the batch is part of the sequence or not. `mask` should be a matrix of shape ``(n_batch, n_time_steps)`` where ``mask[i, j] = 1`` when ``j <= (length of sequence i)`` and ``mask[i, j] = 0`` when ``j > (length of sequence i)``. When the hidden state of this layer is to be pre-filled (i.e. was set to a :class:`Layer` instance) `inputs` should have length at least 2, and `inputs[-1]` is the hidden state to prefill with. Returns ------- layer_output : theano.TensorType Symbolic output variable. """ # Retrieve the layer input input = inputs[0] # Retrieve the mask when it is supplied mask = None hid_init = None if self.mask_incoming_index > 0: mask = inputs[self.mask_incoming_index] if self.hid_init_incoming_index > 0: hid_init = inputs[self.hid_init_incoming_index] # Input should be provided as (n_batch, n_time_steps, n_features) # but scan requires the iterable dimension to be first # So, we need to dimshuffle to (n_time_steps, n_batch, n_features) input = input.dimshuffle(1, 0, *range(2, input.ndim)) seq_len, num_batch = input.shape[0], input.shape[1] if self.precompute_input: # Because the input is given for all time steps, we can precompute # the inputs to hidden before scanning. First we need to reshape # from (seq_len, batch_size, trailing dimensions...) to # (seq_len*batch_size, trailing dimensions...) # This strange use of a generator in a tuple was because # input.shape[2:] was raising a Theano error trailing_dims = tuple(input.shape[n] for n in range(2, input.ndim)) input = T.reshape(input, (seq_len*num_batch,) + trailing_dims) input = helper.get_output( self.input_to_hidden, input, **kwargs) # Reshape back to (seq_len, batch_size, trailing dimensions...) trailing_dims = tuple(input.shape[n] for n in range(1, input.ndim)) input = T.reshape(input, (seq_len, num_batch) + trailing_dims) # We will always pass the hidden-to-hidden layer params to step non_seqs = helper.get_all_params(self.hidden_to_hidden) # When we are not precomputing the input, we also need to pass the # input-to-hidden parameters to step if not self.precompute_input: non_seqs += helper.get_all_params(self.input_to_hidden) # Create single recurrent computation step function def step(input_n, hid_previous, *args): # Compute the hidden-to-hidden activation hid_pre = helper.get_output( self.hidden_to_hidden, hid_previous, **kwargs) # If the dot product is precomputed then add it, otherwise # calculate the input_to_hidden values and add them if self.precompute_input: hid_pre += input_n else: hid_pre += helper.get_output( self.input_to_hidden, input_n, **kwargs) # Clip gradients if self.grad_clipping: hid_pre = theano.gradient.grad_clip( hid_pre, -self.grad_clipping, self.grad_clipping) return self.nonlinearity(hid_pre) def step_masked(input_n, mask_n, hid_previous, *args): # Skip over any input with mask 0 by copying the previous # hidden state; proceed normally for any input with mask 1. hid = step(input_n, hid_previous, *args) hid_out = T.switch(mask_n, hid, hid_previous) return [hid_out] if mask is not None: mask = mask.dimshuffle(1, 0, 'x') sequences = [input, mask] step_fun = step_masked else: sequences = input step_fun = step if not isinstance(self.hid_init, Layer): # The code below simply repeats self.hid_init num_batch times in # its first dimension. Turns out using a dot product and a # dimshuffle is faster than T.repeat. dot_dims = (list(range(1, self.hid_init.ndim - 1)) + [0, self.hid_init.ndim - 1]) hid_init = T.dot(T.ones((num_batch, 1)), self.hid_init.dimshuffle(dot_dims)) if self.unroll_scan: # Retrieve the dimensionality of the incoming layer input_shape = self.input_shapes[0] # Explicitly unroll the recurrence instead of using scan hid_out = unroll_scan( fn=step_fun, sequences=sequences, outputs_info=[hid_init], go_backwards=self.backwards, non_sequences=non_seqs, n_steps=input_shape[1])[0] else: # Scan op iterates over first dimension of input and repeatedly # applies the step function hid_out = theano.scan( fn=step_fun, sequences=sequences, go_backwards=self.backwards, outputs_info=[hid_init], non_sequences=non_seqs, truncate_gradient=self.gradient_steps, strict=True)[0] # When it is requested that we only return the final sequence step, # we need to slice it out immediately after scan is applied if self.only_return_final: hid_out = hid_out[-1] else: # dimshuffle back to (n_batch, n_time_steps, n_features)) hid_out = hid_out.dimshuffle(1, 0, *range(2, hid_out.ndim)) # if scan is backward reverse the output if self.backwards: hid_out = hid_out[:, ::-1] return hid_out class RecurrentLayer(CustomRecurrentLayer): """ lasagne.layers.recurrent.RecurrentLayer(incoming, num_units, W_in_to_hid=lasagne.init.Uniform(), W_hid_to_hid=lasagne.init.Uniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, hid_init=lasagne.init.Constant(0.), backwards=False, learn_init=False, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False, **kwargs) Dense recurrent neural network (RNN) layer A "vanilla" RNN layer, which has dense input-to-hidden and hidden-to-hidden connections. The output is computed as .. math :: h_t = \sigma(x_t W_x + h_{t-1} W_h + b) Parameters ---------- incoming : a :class:`lasagne.layers.Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. num_units : int Number of hidden units in the layer. W_in_to_hid : Theano shared variable, numpy array or callable Initializer for input-to-hidden weight matrix (:math:`W_x`). W_hid_to_hid : Theano shared variable, numpy array or callable Initializer for hidden-to-hidden weight matrix (:math:`W_h`). b : Theano shared variable, numpy array, callable or None Initializer for bias vector (:math:`b`). If None is provided there will be no bias. nonlinearity : callable or None Nonlinearity to apply when computing new state (:math:`\sigma`). If None is provided, no nonlinearity will be applied. hid_init : callable, np.ndarray, theano.shared or :class:`Layer` Initializer for initial hidden state (:math:`h_0`). backwards : bool If True, process the sequence backwards and then reverse the output again such that the output from the layer is always from :math:`x_1` to :math:`x_n`. learn_init : bool If True, initial hidden values are learned. gradient_steps : int Number of timesteps to include in the backpropagated gradient. If -1, backpropagate through the entire sequence. grad_clipping : float If nonzero, the gradient messages are clipped to the given value during the backward pass. See [1]_ (p. 6) for further explanation. unroll_scan : bool If True the recursion is unrolled instead of using scan. For some graphs this gives a significant speed up but it might also consume more memory. When `unroll_scan` is True, backpropagation always includes the full sequence, so `gradient_steps` must be set to -1 and the input sequence length must be known at compile time (i.e., cannot be given as None). precompute_input : bool If True, precompute input_to_hid before iterating through the sequence. This can result in a speedup at the expense of an increase in memory usage. mask_input : :class:`lasagne.layers.Layer` Layer which allows for a sequence mask to be input, for when sequences are of variable length. Default `None`, which means no mask will be supplied (i.e. all sequences are of the same length). only_return_final : bool If True, only return the final sequential output (e.g. for tasks where a single target value for the entire sequence is desired). In this case, Theano makes an optimization which saves memory. References ---------- .. [1] Graves, Alex: "Generating sequences with recurrent neural networks." arXiv preprint arXiv:1308.0850 (2013). """ def __init__(self, incoming, num_units, W_in_to_hid=init.Uniform(), W_hid_to_hid=init.Uniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, hid_init=init.Constant(0.), backwards=False, learn_init=False, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False, **kwargs): if isinstance(incoming, tuple): input_shape = incoming else: input_shape = incoming.output_shape # Retrieve the supplied name, if it exists; otherwise use '' if 'name' in kwargs: basename = kwargs['name'] + '.' # Create a separate version of kwargs for the contained layers # which does not include 'name' layer_kwargs = dict((key, arg) for key, arg in kwargs.items() if key != 'name') else: basename = '' layer_kwargs = kwargs # We will be passing the input at each time step to the dense layer, # so we need to remove the second dimension (the time dimension) in_to_hid = DenseLayer(InputLayer((None,) + input_shape[2:]), num_units, W=W_in_to_hid, b=b, nonlinearity=None, name=basename + 'input_to_hidden', **layer_kwargs) # The hidden-to-hidden layer expects its inputs to have num_units # features because it recycles the previous hidden state hid_to_hid = DenseLayer(InputLayer((None, num_units)), num_units, W=W_hid_to_hid, b=None, nonlinearity=None, name=basename + 'hidden_to_hidden', **layer_kwargs) # Make child layer parameters intuitively accessible self.W_in_to_hid = in_to_hid.W self.W_hid_to_hid = hid_to_hid.W self.b = in_to_hid.b # Just use the CustomRecurrentLayer with the DenseLayers we created super(RecurrentLayer, self).__init__( incoming, in_to_hid, hid_to_hid, nonlinearity=nonlinearity, hid_init=hid_init, backwards=backwards, learn_init=learn_init, gradient_steps=gradient_steps, grad_clipping=grad_clipping, unroll_scan=unroll_scan, precompute_input=precompute_input, mask_input=mask_input, only_return_final=only_return_final, **kwargs) class Gate(object): """ lasagne.layers.recurrent.Gate(W_in=lasagne.init.Normal(0.1), W_hid=lasagne.init.Normal(0.1), W_cell=lasagne.init.Normal(0.1), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.sigmoid) Simple class to hold the parameters for a gate connection. We define a gate loosely as something which computes the linear mix of two inputs, optionally computes an element-wise product with a third, adds a bias, and applies a nonlinearity. Parameters ---------- W_in : Theano shared variable, numpy array or callable Initializer for input-to-gate weight matrix. W_hid : Theano shared variable, numpy array or callable Initializer for hidden-to-gate weight matrix. W_cell : Theano shared variable, numpy array, callable, or None Initializer for cell-to-gate weight vector. If None, no cell-to-gate weight vector will be stored. b : Theano shared variable, numpy array or callable Initializer for input gate bias vector. nonlinearity : callable or None The nonlinearity that is applied to the input gate activation. If None is provided, no nonlinearity will be applied. Examples -------- For :class:`LSTMLayer` the bias of the forget gate is often initialized to a large positive value to encourage the layer initially remember the cell value, see e.g. [1]_ page 15. >>> import lasagne >>> forget_gate = Gate(b=lasagne.init.Constant(5.0)) >>> l_lstm = LSTMLayer((10, 20, 30), num_units=10, ... forgetgate=forget_gate) References ---------- .. [1] Gers, Felix A., Jürgen Schmidhuber, and Fred Cummins. "Learning to forget: Continual prediction with LSTM." Neural computation 12.10 (2000): 2451-2471. """ def __init__(self, W_in=init.Normal(0.1), W_hid=init.Normal(0.1), W_cell=init.Normal(0.1), b=init.Constant(0.), nonlinearity=nonlinearities.sigmoid): self.W_in = W_in self.W_hid = W_hid # Don't store a cell weight vector when cell is None if W_cell is not None: self.W_cell = W_cell self.b = b # For the nonlinearity, if None is supplied, use identity if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity class LSTMLayer(MergeLayer): r""" lasagne.layers.recurrent.LSTMLayer(incoming, num_units, ingate=lasagne.layers.Gate(), forgetgate=lasagne.layers.Gate(), cell=lasagne.layers.Gate( W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), outgate=lasagne.layers.Gate(), nonlinearity=lasagne.nonlinearities.tanh, cell_init=lasagne.init.Constant(0.), hid_init=lasagne.init.Constant(0.), backwards=False, learn_init=False, peepholes=True, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False, **kwargs) A long short-term memory (LSTM) layer. Includes optional "peephole connections" and a forget gate. Based on the definition in [1]_, which is the current common definition. The output is computed by .. math :: i_t &= \sigma_i(x_t W_{xi} + h_{t-1} W_{hi} + w_{ci} \odot c_{t-1} + b_i)\\ f_t &= \sigma_f(x_t W_{xf} + h_{t-1} W_{hf} + w_{cf} \odot c_{t-1} + b_f)\\ c_t &= f_t \odot c_{t - 1} + i_t \odot \sigma_c(x_t W_{xc} + h_{t-1} W_{hc} + b_c)\\ o_t &= \sigma_o(x_t W_{xo} + h_{t-1} W_{ho} + w_{co} \odot c_t + b_o)\\ h_t &= o_t \odot \sigma_h(c_t) Parameters ---------- incoming : a :class:`lasagne.layers.Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. num_units : int Number of hidden/cell units in the layer. ingate : Gate Parameters for the input gate (:math:`i_t`): :math:`W_{xi}`, :math:`W_{hi}`, :math:`w_{ci}`, :math:`b_i`, and :math:`\sigma_i`. forgetgate : Gate Parameters for the forget gate (:math:`f_t`): :math:`W_{xf}`, :math:`W_{hf}`, :math:`w_{cf}`, :math:`b_f`, and :math:`\sigma_f`. cell : Gate Parameters for the cell computation (:math:`c_t`): :math:`W_{xc}`, :math:`W_{hc}`, :math:`b_c`, and :math:`\sigma_c`. outgate : Gate Parameters for the output gate (:math:`o_t`): :math:`W_{xo}`, :math:`W_{ho}`, :math:`w_{co}`, :math:`b_o`, and :math:`\sigma_o`. nonlinearity : callable or None The nonlinearity that is applied to the output (:math:`\sigma_h`). If None is provided, no nonlinearity will be applied. cell_init : callable, np.ndarray, theano.shared or :class:`Layer` Initializer for initial cell state (:math:`c_0`). hid_init : callable, np.ndarray, theano.shared or :class:`Layer` Initializer for initial hidden state (:math:`h_0`). backwards : bool If True, process the sequence backwards and then reverse the output again such that the output from the layer is always from :math:`x_1` to :math:`x_n`. learn_init : bool If True, initial hidden values are learned. peepholes : bool If True, the LSTM uses peephole connections. When False, `ingate.W_cell`, `forgetgate.W_cell` and `outgate.W_cell` are ignored. gradient_steps : int Number of timesteps to include in the backpropagated gradient. If -1, backpropagate through the entire sequence. grad_clipping : float If nonzero, the gradient messages are clipped to the given value during the backward pass. See [1]_ (p. 6) for further explanation. unroll_scan : bool If True the recursion is unrolled instead of using scan. For some graphs this gives a significant speed up but it might also consume more memory. When `unroll_scan` is True, backpropagation always includes the full sequence, so `gradient_steps` must be set to -1 and the input sequence length must be known at compile time (i.e., cannot be given as None). precompute_input : bool If True, precompute input_to_hid before iterating through the sequence. This can result in a speedup at the expense of an increase in memory usage. mask_input : :class:`lasagne.layers.Layer` Layer which allows for a sequence mask to be input, for when sequences are of variable length. Default `None`, which means no mask will be supplied (i.e. all sequences are of the same length). only_return_final : bool If True, only return the final sequential output (e.g. for tasks where a single target value for the entire sequence is desired). In this case, Theano makes an optimization which saves memory. References ---------- .. [1] Graves, Alex: "Generating sequences with recurrent neural networks." arXiv preprint arXiv:1308.0850 (2013). """ def __init__(self, incoming, num_units, ingate=Gate(), forgetgate=Gate(), cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(), nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.), backwards=False, learn_init=False, peepholes=True, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False, **kwargs): # This layer inherits from a MergeLayer, because it can have four # inputs - the layer input, the mask, the initial hidden state and the # inital cell state. We will just provide the layer input as incomings, # unless a mask input, inital hidden state or initial cell state was # provided. incomings = [incoming] self.mask_incoming_index = -1 self.hid_init_incoming_index = -1 self.cell_init_incoming_index = -1 if mask_input is not None: incomings.append(mask_input) self.mask_incoming_index = len(incomings)-1 if isinstance(hid_init, Layer): incomings.append(hid_init) self.hid_init_incoming_index = len(incomings)-1 if isinstance(cell_init, Layer): incomings.append(cell_init) self.cell_init_incoming_index = len(incomings)-1 # Initialize parent layer super(LSTMLayer, self).__init__(incomings, **kwargs) # If the provided nonlinearity is None, make it linear if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity self.learn_init = learn_init self.num_units = num_units self.backwards = backwards self.peepholes = peepholes self.gradient_steps = gradient_steps self.grad_clipping = grad_clipping self.unroll_scan = unroll_scan self.precompute_input = precompute_input self.only_return_final = only_return_final if unroll_scan and gradient_steps != -1: raise ValueError( "Gradient steps must be -1 when unroll_scan is true.") # Retrieve the dimensionality of the incoming layer input_shape = self.input_shapes[0] if unroll_scan and input_shape[1] is None: raise ValueError("Input sequence length cannot be specified as " "None when unroll_scan is True") num_inputs = np.prod(input_shape[2:]) def add_gate_params(gate, gate_name): """ Convenience function for adding layer parameters from a Gate instance. """ return (self.add_param(gate.W_in, (num_inputs, num_units), name="W_in_to_{}".format(gate_name)), self.add_param(gate.W_hid, (num_units, num_units), name="W_hid_to_{}".format(gate_name)), self.add_param(gate.b, (num_units,), name="b_{}".format(gate_name), regularizable=False), gate.nonlinearity) # Add in parameters from the supplied Gate instances (self.W_in_to_ingate, self.W_hid_to_ingate, self.b_ingate, self.nonlinearity_ingate) = add_gate_params(ingate, 'ingate') (self.W_in_to_forgetgate, self.W_hid_to_forgetgate, self.b_forgetgate, self.nonlinearity_forgetgate) = add_gate_params(forgetgate, 'forgetgate') (self.W_in_to_cell, self.W_hid_to_cell, self.b_cell, self.nonlinearity_cell) = add_gate_params(cell, 'cell') (self.W_in_to_outgate, self.W_hid_to_outgate, self.b_outgate, self.nonlinearity_outgate) = add_gate_params(outgate, 'outgate') # If peephole (cell to gate) connections were enabled, initialize # peephole connections. These are elementwise products with the cell # state, so they are represented as vectors. if self.peepholes: self.W_cell_to_ingate = self.add_param( ingate.W_cell, (num_units, ), name="W_cell_to_ingate") self.W_cell_to_forgetgate = self.add_param( forgetgate.W_cell, (num_units, ), name="W_cell_to_forgetgate") self.W_cell_to_outgate = self.add_param( outgate.W_cell, (num_units, ), name="W_cell_to_outgate") # Setup initial values for the cell and the hidden units if isinstance(cell_init, Layer): self.cell_init = cell_init else: self.cell_init = self.add_param( cell_init, (1, num_units), name="cell_init", trainable=learn_init, regularizable=False) if isinstance(hid_init, Layer): self.hid_init = hid_init else: self.hid_init = self.add_param( hid_init, (1, self.num_units), name="hid_init", trainable=learn_init, regularizable=False) def get_output_shape_for(self, input_shapes): # The shape of the input to this layer will be the first element # of input_shapes, whether or not a mask input is being used. input_shape = input_shapes[0] # When only_return_final is true, the second (sequence step) dimension # will be flattened if self.only_return_final: return input_shape[0], self.num_units # Otherwise, the shape will be (n_batch, n_steps, num_units) else: return input_shape[0], input_shape[1], self.num_units def get_output_for(self, inputs, **kwargs): """ Compute this layer's output function given a symbolic input variable Parameters ---------- inputs : list of theano.TensorType `inputs[0]` should always be the symbolic input variable. When this layer has a mask input (i.e. was instantiated with `mask_input != None`, indicating that the lengths of sequences in each batch vary), `inputs` should have length 2, where `inputs[1]` is the `mask`. The `mask` should be supplied as a Theano variable denoting whether each time step in each sequence in the batch is part of the sequence or not. `mask` should be a matrix of shape ``(n_batch, n_time_steps)`` where ``mask[i, j] = 1`` when ``j <= (length of sequence i)`` and ``mask[i, j] = 0`` when ``j > (length of sequence i)``. When the hidden state of this layer is to be pre-filled (i.e. was set to a :class:`Layer` instance) `inputs` should have length at least 2, and `inputs[-1]` is the hidden state to prefill with. When the cell state of this layer is to be pre-filled (i.e. was set to a :class:`Layer` instance) `inputs` should have length at least 2, and `inputs[-1]` is the hidden state to prefill with. When both the cell state and the hidden state are being pre-filled `inputs[-2]` is the hidden state, while `inputs[-1]` is the cell state. Returns ------- layer_output : theano.TensorType Symbolic output variable. """ # Retrieve the layer input input = inputs[0] # Retrieve the mask when it is supplied mask = None hid_init = None cell_init = None if self.mask_incoming_index > 0: mask = inputs[self.mask_incoming_index] if self.hid_init_incoming_index > 0: hid_init = inputs[self.hid_init_incoming_index] if self.cell_init_incoming_index > 0: cell_init = inputs[self.cell_init_incoming_index] # Treat all dimensions after the second as flattened feature dimensions if input.ndim > 3: input = T.flatten(input, 3) # Because scan iterates over the first dimension we dimshuffle to # (n_time_steps, n_batch, n_features) input = input.dimshuffle(1, 0, 2) seq_len, num_batch, _ = input.shape # Stack input weight matrices into a (num_inputs, 4*num_units) # matrix, which speeds up computation W_in_stacked = T.concatenate( [self.W_in_to_ingate, self.W_in_to_forgetgate, self.W_in_to_cell, self.W_in_to_outgate], axis=1) # Same for hidden weight matrices W_hid_stacked = T.concatenate( [self.W_hid_to_ingate, self.W_hid_to_forgetgate, self.W_hid_to_cell, self.W_hid_to_outgate], axis=1) # Stack biases into a (4*num_units) vector b_stacked = T.concatenate( [self.b_ingate, self.b_forgetgate, self.b_cell, self.b_outgate], axis=0) if self.precompute_input: # Because the input is given for all time steps, we can # precompute_input the inputs dot weight matrices before scanning. # W_in_stacked is (n_features, 4*num_units). input is then # (n_time_steps, n_batch, 4*num_units). input = T.dot(input, W_in_stacked) + b_stacked # When theano.scan calls step, input_n will be (n_batch, 4*num_units). # We define a slicing function that extract the input to each LSTM gate def slice_w(x, n): s = x[:, n*self.num_units:(n+1)*self.num_units] if self.num_units == 1: s = T.addbroadcast(s, 1) # Theano cannot infer this by itself return s # Create single recurrent computation step function # input_n is the n'th vector of the input def step(input_n, cell_previous, hid_previous, *args): if not self.precompute_input: input_n = T.dot(input_n, W_in_stacked) + b_stacked # Calculate gates pre-activations and slice gates = input_n + T.dot(hid_previous, W_hid_stacked) # Clip gradients if self.grad_clipping: gates = theano.gradient.grad_clip( gates, -self.grad_clipping, self.grad_clipping) # Extract the pre-activation gate values ingate = slice_w(gates, 0) forgetgate = slice_w(gates, 1) cell_input = slice_w(gates, 2) outgate = slice_w(gates, 3) if self.peepholes: # Compute peephole connections ingate += cell_previous*self.W_cell_to_ingate forgetgate += cell_previous*self.W_cell_to_forgetgate # Apply nonlinearities ingate = self.nonlinearity_ingate(ingate) forgetgate = self.nonlinearity_forgetgate(forgetgate) cell_input = self.nonlinearity_cell(cell_input) # Compute new cell value cell = forgetgate*cell_previous + ingate*cell_input if self.peepholes: outgate += cell*self.W_cell_to_outgate outgate = self.nonlinearity_outgate(outgate) # Compute new hidden unit activation hid = outgate*self.nonlinearity(cell) return [cell, hid] def step_masked(input_n, mask_n, cell_previous, hid_previous, *args): cell, hid = step(input_n, cell_previous, hid_previous, *args) # Skip over any input with mask 0 by copying the previous # hidden state; proceed normally for any input with mask 1. cell = T.switch(mask_n, cell, cell_previous) hid = T.switch(mask_n, hid, hid_previous) return [cell, hid] if mask is not None: # mask is given as (batch_size, seq_len). Because scan iterates # over first dimension, we dimshuffle to (seq_len, batch_size) and # add a broadcastable dimension mask = mask.dimshuffle(1, 0, 'x') sequences = [input, mask] step_fun = step_masked else: sequences = input step_fun = step ones = T.ones((num_batch, 1)) if not isinstance(self.cell_init, Layer): # Dot against a 1s vector to repeat to shape (num_batch, num_units) cell_init = T.dot(ones, self.cell_init) if not isinstance(self.hid_init, Layer): # Dot against a 1s vector to repeat to shape (num_batch, num_units) hid_init = T.dot(ones, self.hid_init) # The hidden-to-hidden weight matrix is always used in step non_seqs = [W_hid_stacked] # The "peephole" weight matrices are only used when self.peepholes=True if self.peepholes: non_seqs += [self.W_cell_to_ingate, self.W_cell_to_forgetgate, self.W_cell_to_outgate] # When we aren't precomputing the input outside of scan, we need to # provide the input weights and biases to the step function if not self.precompute_input: non_seqs += [W_in_stacked, b_stacked] if self.unroll_scan: # Retrieve the dimensionality of the incoming layer input_shape = self.input_shapes[0] # Explicitly unroll the recurrence instead of using scan cell_out, hid_out = unroll_scan( fn=step_fun, sequences=sequences, outputs_info=[cell_init, hid_init], go_backwards=self.backwards, non_sequences=non_seqs, n_steps=input_shape[1]) else: # Scan op iterates over first dimension of input and repeatedly # applies the step function cell_out, hid_out = theano.scan( fn=step_fun, sequences=sequences, outputs_info=[cell_init, hid_init], go_backwards=self.backwards, truncate_gradient=self.gradient_steps, non_sequences=non_seqs, strict=True)[0] # When it is requested that we only return the final sequence step, # we need to slice it out immediately after scan is applied if self.only_return_final: hid_out = hid_out[-1] else: # dimshuffle back to (n_batch, n_time_steps, n_features)) hid_out = hid_out.dimshuffle(1, 0, 2) # if scan is backward reverse the output if self.backwards: hid_out = hid_out[:, ::-1] return hid_out class GRULayer(MergeLayer): r""" lasagne.layers.recurrent.GRULayer(incoming, num_units, resetgate=lasagne.layers.Gate(W_cell=None), updategate=lasagne.layers.Gate(W_cell=None), hidden_update=lasagne.layers.Gate( W_cell=None, lasagne.nonlinearities.tanh), hid_init=lasagne.init.Constant(0.), backwards=False, learn_init=False, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False, **kwargs) Gated Recurrent Unit (GRU) Layer Implements the recurrent step proposed in [1]_, which computes the output by .. math :: r_t &= \sigma_r(x_t W_{xr} + h_{t - 1} W_{hr} + b_r)\\ u_t &= \sigma_u(x_t W_{xu} + h_{t - 1} W_{hu} + b_u)\\ c_t &= \sigma_c(x_t W_{xc} + r_t \odot (h_{t - 1} W_{hc}) + b_c)\\ h_t &= (1 - u_t) \odot h_{t - 1} + u_t \odot c_t Parameters ---------- incoming : a :class:`lasagne.layers.Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. num_units : int Number of hidden units in the layer. resetgate : Gate Parameters for the reset gate (:math:`r_t`): :math:`W_{xr}`, :math:`W_{hr}`, :math:`b_r`, and :math:`\sigma_r`. updategate : Gate Parameters for the update gate (:math:`u_t`): :math:`W_{xu}`, :math:`W_{hu}`, :math:`b_u`, and :math:`\sigma_u`. hidden_update : Gate Parameters for the hidden update (:math:`c_t`): :math:`W_{xc}`, :math:`W_{hc}`, :math:`b_c`, and :math:`\sigma_c`. hid_init : callable, np.ndarray, theano.shared or :class:`Layer` Initializer for initial hidden state (:math:`h_0`). backwards : bool If True, process the sequence backwards and then reverse the output again such that the output from the layer is always from :math:`x_1` to :math:`x_n`. learn_init : bool If True, initial hidden values are learned. gradient_steps : int Number of timesteps to include in the backpropagated gradient. If -1, backpropagate through the entire sequence. grad_clipping : float If nonzero, the gradient messages are clipped to the given value during the backward pass. See [1]_ (p. 6) for further explanation. unroll_scan : bool If True the recursion is unrolled instead of using scan. For some graphs this gives a significant speed up but it might also consume more memory. When `unroll_scan` is True, backpropagation always includes the full sequence, so `gradient_steps` must be set to -1 and the input sequence length must be known at compile time (i.e., cannot be given as None). precompute_input : bool If True, precompute input_to_hid before iterating through the sequence. This can result in a speedup at the expense of an increase in memory usage. mask_input : :class:`lasagne.layers.Layer` Layer which allows for a sequence mask to be input, for when sequences are of variable length. Default `None`, which means no mask will be supplied (i.e. all sequences are of the same length). only_return_final : bool If True, only return the final sequential output (e.g. for tasks where a single target value for the entire sequence is desired). In this case, Theano makes an optimization which saves memory. References ---------- .. [1] Cho, Kyunghyun, et al: On the properties of neural machine translation: Encoder-decoder approaches. arXiv preprint arXiv:1409.1259 (2014). .. [2] Chung, Junyoung, et al.: Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling. arXiv preprint arXiv:1412.3555 (2014). .. [3] Graves, Alex: "Generating sequences with recurrent neural networks." arXiv preprint arXiv:1308.0850 (2013). Notes ----- An alternate update for the candidate hidden state is proposed in [2]_: .. math:: c_t &= \sigma_c(x_t W_{ic} + (r_t \odot h_{t - 1})W_{hc} + b_c)\\ We use the formulation from [1]_ because it allows us to do all matrix operations in a single dot product. """ def __init__(self, incoming, num_units, resetgate=Gate(W_cell=None), updategate=Gate(W_cell=None), hidden_update=Gate(W_cell=None, nonlinearity=nonlinearities.tanh), hid_init=init.Constant(0.), backwards=False, learn_init=False, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False, **kwargs): # This layer inherits from a MergeLayer, because it can have three # inputs - the layer input, the mask and the initial hidden state. We # will just provide the layer input as incomings, unless a mask input # or initial hidden state was provided. incomings = [incoming] self.mask_incoming_index = -1 self.hid_init_incoming_index = -1 if mask_input is not None: incomings.append(mask_input) self.mask_incoming_index = len(incomings)-1 if isinstance(hid_init, Layer): incomings.append(hid_init) self.hid_init_incoming_index = len(incomings)-1 # Initialize parent layer super(GRULayer, self).__init__(incomings, **kwargs) self.learn_init = learn_init self.num_units = num_units self.grad_clipping = grad_clipping self.backwards = backwards self.gradient_steps = gradient_steps self.unroll_scan = unroll_scan self.precompute_input = precompute_input self.only_return_final = only_return_final if unroll_scan and gradient_steps != -1: raise ValueError( "Gradient steps must be -1 when unroll_scan is true.") # Retrieve the dimensionality of the incoming layer input_shape = self.input_shapes[0] if unroll_scan and input_shape[1] is None: raise ValueError("Input sequence length cannot be specified as " "None when unroll_scan is True") # Input dimensionality is the output dimensionality of the input layer num_inputs = np.prod(input_shape[2:]) def add_gate_params(gate, gate_name): """ Convenience function for adding layer parameters from a Gate instance. """ return (self.add_param(gate.W_in, (num_inputs, num_units), name="W_in_to_{}".format(gate_name)), self.add_param(gate.W_hid, (num_units, num_units), name="W_hid_to_{}".format(gate_name)), self.add_param(gate.b, (num_units,), name="b_{}".format(gate_name), regularizable=False), gate.nonlinearity) # Add in all parameters from gates (self.W_in_to_updategate, self.W_hid_to_updategate, self.b_updategate, self.nonlinearity_updategate) = add_gate_params(updategate, 'updategate') (self.W_in_to_resetgate, self.W_hid_to_resetgate, self.b_resetgate, self.nonlinearity_resetgate) = add_gate_params(resetgate, 'resetgate') (self.W_in_to_hidden_update, self.W_hid_to_hidden_update, self.b_hidden_update, self.nonlinearity_hid) = add_gate_params( hidden_update, 'hidden_update') # Initialize hidden state if isinstance(hid_init, Layer): self.hid_init = hid_init else: self.hid_init = self.add_param( hid_init, (1, self.num_units), name="hid_init", trainable=learn_init, regularizable=False) def get_output_shape_for(self, input_shapes): # The shape of the input to this layer will be the first element # of input_shapes, whether or not a mask input is being used. input_shape = input_shapes[0] # When only_return_final is true, the second (sequence step) dimension # will be flattened if self.only_return_final: return input_shape[0], self.num_units # Otherwise, the shape will be (n_batch, n_steps, num_units) else: return input_shape[0], input_shape[1], self.num_units def get_output_for(self, inputs, **kwargs): """ Compute this layer's output function given a symbolic input variable Parameters ---------- inputs : list of theano.TensorType `inputs[0]` should always be the symbolic input variable. When this layer has a mask input (i.e. was instantiated with `mask_input != None`, indicating that the lengths of sequences in each batch vary), `inputs` should have length 2, where `inputs[1]` is the `mask`. The `mask` should be supplied as a Theano variable denoting whether each time step in each sequence in the batch is part of the sequence or not. `mask` should be a matrix of shape ``(n_batch, n_time_steps)`` where ``mask[i, j] = 1`` when ``j <= (length of sequence i)`` and ``mask[i, j] = 0`` when ``j > (length of sequence i)``. When the hidden state of this layer is to be pre-filled (i.e. was set to a :class:`Layer` instance) `inputs` should have length at least 2, and `inputs[-1]` is the hidden state to prefill with. Returns ------- layer_output : theano.TensorType Symbolic output variable. """ # Retrieve the layer input input = inputs[0] # Retrieve the mask when it is supplied mask = None hid_init = None if self.mask_incoming_index > 0: mask = inputs[self.mask_incoming_index] if self.hid_init_incoming_index > 0: hid_init = inputs[self.hid_init_incoming_index] # Treat all dimensions after the second as flattened feature dimensions if input.ndim > 3: input = T.flatten(input, 3) # Because scan iterates over the first dimension we dimshuffle to # (n_time_steps, n_batch, n_features) input = input.dimshuffle(1, 0, 2) seq_len, num_batch, _ = input.shape # Stack input weight matrices into a (num_inputs, 3*num_units) # matrix, which speeds up computation W_in_stacked = T.concatenate( [self.W_in_to_resetgate, self.W_in_to_updategate, self.W_in_to_hidden_update], axis=1) # Same for hidden weight matrices W_hid_stacked = T.concatenate( [self.W_hid_to_resetgate, self.W_hid_to_updategate, self.W_hid_to_hidden_update], axis=1) # Stack gate biases into a (3*num_units) vector b_stacked = T.concatenate( [self.b_resetgate, self.b_updategate, self.b_hidden_update], axis=0) if self.precompute_input: # precompute_input inputs*W. W_in is (n_features, 3*num_units). # input is then (n_batch, n_time_steps, 3*num_units). input = T.dot(input, W_in_stacked) + b_stacked # When theano.scan calls step, input_n will be (n_batch, 3*num_units). # We define a slicing function that extract the input to each GRU gate def slice_w(x, n): s = x[:, n*self.num_units:(n+1)*self.num_units] if self.num_units == 1: s = T.addbroadcast(s, 1) # Theano cannot infer this by itself return s # Create single recurrent computation step function # input__n is the n'th vector of the input def step(input_n, hid_previous, *args): # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1}, and W_{hc} h_{t - 1} hid_input = T.dot(hid_previous, W_hid_stacked) if self.grad_clipping: input_n = theano.gradient.grad_clip( input_n, -self.grad_clipping, self.grad_clipping) hid_input = theano.gradient.grad_clip( hid_input, -self.grad_clipping, self.grad_clipping) if not self.precompute_input: # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c input_n = T.dot(input_n, W_in_stacked) + b_stacked # Reset and update gates resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0) updategate = slice_w(hid_input, 1) + slice_w(input_n, 1) resetgate = self.nonlinearity_resetgate(resetgate) updategate = self.nonlinearity_updategate(updategate) # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1}) hidden_update_in = slice_w(input_n, 2) hidden_update_hid = slice_w(hid_input, 2) hidden_update = hidden_update_in + resetgate*hidden_update_hid if self.grad_clipping: hidden_update = theano.gradient.grad_clip( hidden_update, -self.grad_clipping, self.grad_clipping) hidden_update = self.nonlinearity_hid(hidden_update) # Compute (1 - u_t)h_{t - 1} + u_t c_t hid = (1 - updategate)*hid_previous + updategate*hidden_update return hid def step_masked(input_n, mask_n, hid_previous, *args): hid = step(input_n, hid_previous, *args) # Skip over any input with mask 0 by copying the previous # hidden state; proceed normally for any input with mask 1. hid = T.switch(mask_n, hid, hid_previous) return hid if mask is not None: # mask is given as (batch_size, seq_len). Because scan iterates # over first dimension, we dimshuffle to (seq_len, batch_size) and # add a broadcastable dimension mask = mask.dimshuffle(1, 0, 'x') sequences = [input, mask] step_fun = step_masked else: sequences = [input] step_fun = step if not isinstance(self.hid_init, Layer): # Dot against a 1s vector to repeat to shape (num_batch, num_units) hid_init = T.dot(T.ones((num_batch, 1)), self.hid_init) # The hidden-to-hidden weight matrix is always used in step non_seqs = [W_hid_stacked] # When we aren't precomputing the input outside of scan, we need to # provide the input weights and biases to the step function if not self.precompute_input: non_seqs += [W_in_stacked, b_stacked] if self.unroll_scan: # Retrieve the dimensionality of the incoming layer input_shape = self.input_shapes[0] # Explicitly unroll the recurrence instead of using scan hid_out = unroll_scan( fn=step_fun, sequences=sequences, outputs_info=[hid_init], go_backwards=self.backwards, non_sequences=non_seqs, n_steps=input_shape[1])[0] else: # Scan op iterates over first dimension of input and repeatedly # applies the step function hid_out = theano.scan( fn=step_fun, sequences=sequences, go_backwards=self.backwards, outputs_info=[hid_init], non_sequences=non_seqs, truncate_gradient=self.gradient_steps, strict=True)[0] # When it is requested that we only return the final sequence step, # we need to slice it out immediately after scan is applied if self.only_return_final: hid_out = hid_out[-1] else: # dimshuffle back to (n_batch, n_time_steps, n_features)) hid_out = hid_out.dimshuffle(1, 0, 2) # if scan is backward reverse the output if self.backwards: hid_out = hid_out[:, ::-1] return hid_out Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/local.py0000644000175000017500000002160413307306052025437 0ustar sinclairssinclairsimport theano.tensor as T from .. import init from .. import nonlinearities from .conv import Conv2DLayer __all__ = [ "LocallyConnected2DLayer", ] class LocallyConnected2DLayer(Conv2DLayer): """ lasagne.layers.LocallyConnected2DLayer(incoming, num_filters, filter_size, stride=(1, 1), pad='same', untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=True, channelwise=False, **kwargs) 2D locally connected layer Performs an operation similar to a 2D convolution but without the weight sharing, then optionally adds a bias and applies an elementwise nonlinearity. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. The output of this layer should be a 4D tensor, with shape ``(batch_size, num_input_channels, input_rows, input_columns)``. num_filters : int The number of learnable convolutional filters this layer has. filter_size : int or iterable of int An integer or a 2-element tuple specifying the size of the filters. stride : int or iterable of int An integer or a 2-element tuple specifying the stride of the convolution operation. This implementation only supports unit stride, the argument is provided for compatibility to convolutional layers only. pad : int, iterable of int, or 'valid' (default: 'same') The amount of implicit zero padding of the input. This implementation only supports 'same' padding, the argument is provided for compatibility to other convolutional layers only. untie_biases : bool (default: False) If ``False``, the layer will have a bias parameter for each channel, which is shared across all positions in this channel. As a result, the `b` attribute will be a vector (1D). If True, the layer will have separate bias parameters for each position in each channel. As a result, the `b` attribute will be a 3D tensor. W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. If ``channelwise`` is set to ``False``, the weights should be a 6D tensor with shape ``(num_filters, num_input_channels, filter_rows, filter_columns, output_rows, output_columns)``. If ``channelwise`` is set to ``True``, the weights should be a 5D tensor with shape ``(num_filters, filter_rows, filter_columns, output_rows, output_columns)``. See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to ``False``. If it is set to ``True``, its shape should be ``(num_filters, output_rows, output_columns)`` instead. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. flip_filters : bool (default: True) Whether to flip the filters before multiplying them over the input, similar to a convolution (this is the default), or not to flip them, similar to a correlation. channelwise : bool (default: False) If ``False``, each filter interacts will all of the input channels as in a convolution. If ``True``, each filter only interacts with the corresponding input channel. That is, each output channel only depends on its filter and on the input channel at the same channel index. In this case, the number of output channels (i.e. number of filters) should be equal to the number of input channels. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. Attributes ---------- W : Theano shared variable or expression Variable or expression representing the filter weights. b : Theano shared variable or expression Variable or expression representing the biases. Notes ----- This implementation computes the output tensor by iterating over the filter weights and multiplying them with shifted versions of the input tensor. This implementation assumes no stride, 'same' padding and no dilation. Raises ------ ValueError When ``channelwise`` is set to ``True`` and the number of filters differs from the number of input channels, a `ValueError` is raised. """ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1), pad='same', untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, flip_filters=True, channelwise=False, **kwargs): self.channelwise = channelwise super(LocallyConnected2DLayer, self).__init__( incoming, num_filters, filter_size, stride=stride, pad=pad, untie_biases=untie_biases, W=W, b=b, nonlinearity=nonlinearity, flip_filters=flip_filters, **kwargs) # require no stride if self.stride != (1, 1): raise NotImplementedError( "LocallyConnected2DLayer requires stride=1 / (1, 1), but got " "%r." % (stride,)) # require same convolution if self.pad != 'same': raise NotImplementedError( "LocallyConnected2DLayer requires pad='same', but got %r." % (pad,)) def get_W_shape(self): if any(s is None for s in self.input_shape[1:]): raise ValueError( "A LocallyConnected2DLayer requires a fixed input shape " "(except for the batch size). Got %r." % (self.input_shape,)) num_input_channels = self.input_shape[1] output_shape = self.get_output_shape_for(self.input_shape) if self.channelwise: if self.channelwise and self.num_filters != num_input_channels: raise ValueError("num_filters and the number of input " "channels should match when channelwise is " "true, but got num_filters=%r and %d input " "channels" % (self.num_filters, num_input_channels)) return (self.num_filters,) + self.filter_size + output_shape[-2:] else: return (self.num_filters, num_input_channels) + \ self.filter_size + output_shape[-2:] def convolve(self, input, **kwargs): output_shape = self.output_shape # start with ii == jj == 0 case to initialize tensor i = self.filter_size[0] // 2 j = self.filter_size[1] // 2 filter_h_ind = -i-1 if self.flip_filters else i filter_w_ind = -j-1 if self.flip_filters else j if self.channelwise: conved = input * self.W[:, filter_h_ind, filter_w_ind, :, :] else: conved = \ (input[:, None, :, :, :] * self.W[:, :, filter_h_ind, filter_w_ind, :, :]).sum(axis=-3) for i in range(self.filter_size[0]): filter_h_ind = -i-1 if self.flip_filters else i ii = i - (self.filter_size[0] // 2) input_h_slice = slice( max(ii, 0), min(ii + output_shape[-2], output_shape[-2])) output_h_slice = slice( max(-ii, 0), min(-ii + output_shape[-2], output_shape[-2])) for j in range(self.filter_size[1]): filter_w_ind = -j-1 if self.flip_filters else j jj = j - (self.filter_size[1] // 2) input_w_slice = slice( max(jj, 0), min(jj + output_shape[-1], output_shape[-1])) output_w_slice = slice( max(-jj, 0), min(-jj + output_shape[-1], output_shape[-1])) # skip this case since it was done at the beginning if ii == jj == 0: continue if self.channelwise: inc = (input[:, :, input_h_slice, input_w_slice] * self.W[:, filter_h_ind, filter_w_ind, output_h_slice, output_w_slice]) else: inc = (input[:, None, :, input_h_slice, input_w_slice] * self.W[:, :, filter_h_ind, filter_w_ind, output_h_slice, output_w_slice]).sum(axis=-3) conved = T.inc_subtensor( conved[:, :, output_h_slice, output_w_slice], inc) return conved Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/base.py0000644000175000017500000003110013307306052025247 0ustar sinclairssinclairsfrom collections import OrderedDict import theano.tensor as T from .. import utils __all__ = [ "Layer", "MergeLayer", ] # Layer base class class Layer(object): """ The :class:`Layer` class represents a single layer of a neural network. It should be subclassed when implementing new types of layers. Because each layer can keep track of the layer(s) feeding into it, a network's output :class:`Layer` instance can double as a handle to the full network. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. name : a string or None An optional name to attach to this layer. """ def __init__(self, incoming, name=None): if isinstance(incoming, tuple): self.input_shape = incoming self.input_layer = None else: self.input_shape = incoming.output_shape self.input_layer = incoming self.name = name self.params = OrderedDict() self.get_output_kwargs = [] if any(d is not None and d <= 0 for d in self.input_shape): raise ValueError(( "Cannot create Layer with a non-positive input_shape " "dimension. input_shape=%r, self.name=%r") % ( self.input_shape, self.name)) @property def output_shape(self): shape = self.get_output_shape_for(self.input_shape) if any(isinstance(s, T.Variable) for s in shape): raise ValueError("%s returned a symbolic output shape from its " "get_output_shape_for() method: %r. This is not " "allowed; shapes must be tuples of integers for " "fixed-size dimensions and Nones for variable " "dimensions." % (self.__class__.__name__, shape)) return shape def get_params(self, unwrap_shared=True, **tags): """ Returns a list of Theano shared variables or expressions that parameterize the layer. By default, all shared variables that participate in the forward pass will be returned (in the order they were registered in the Layer's constructor via :meth:`add_param()`). The list can optionally be filtered by specifying tags as keyword arguments. For example, ``trainable=True`` will only return trainable parameters, and ``regularizable=True`` will only return parameters that can be regularized (e.g., by L2 decay). If any of the layer's parameters was set to a Theano expression instead of a shared variable, `unwrap_shared` controls whether to return the shared variables involved in that expression (``unwrap_shared=True``, the default), or the expression itself (``unwrap_shared=False``). In either case, tag filtering applies to the expressions, considering all variables within an expression to be tagged the same. Parameters ---------- unwrap_shared : bool (default: True) Affects only parameters that were set to a Theano expression. If ``True`` the function returns the shared variables contained in the expression, otherwise the Theano expression itself. **tags (optional) tags can be specified to filter the list. Specifying ``tag1=True`` will limit the list to parameters that are tagged with ``tag1``. Specifying ``tag1=False`` will limit the list to parameters that are not tagged with ``tag1``. Commonly used tags are ``regularizable`` and ``trainable``. Returns ------- list of Theano shared variables or expressions A list of variables that parameterize the layer Notes ----- For layers without any parameters, this will return an empty list. """ result = list(self.params.keys()) only = set(tag for tag, value in tags.items() if value) if only: # retain all parameters that have all of the tags in `only` result = [param for param in result if not (only - self.params[param])] exclude = set(tag for tag, value in tags.items() if not value) if exclude: # retain all parameters that have none of the tags in `exclude` result = [param for param in result if not (self.params[param] & exclude)] if unwrap_shared: return utils.collect_shared_vars(result) else: return result def get_output_shape_for(self, input_shape): """ Computes the output shape of this layer, given an input shape. Parameters ---------- input_shape : tuple A tuple representing the shape of the input. The tuple should have as many elements as there are input dimensions, and the elements should be integers or `None`. Returns ------- tuple A tuple representing the shape of the output of this layer. The tuple has as many elements as there are output dimensions, and the elements are all either integers or `None`. Notes ----- This method will typically be overridden when implementing a new :class:`Layer` class. By default it simply returns the input shape. This means that a layer that does not modify the shape (e.g. because it applies an elementwise operation) does not need to override this method. """ return input_shape def get_output_for(self, input, **kwargs): """ Propagates the given input through this layer (and only this layer). Parameters ---------- input : Theano expression The expression to propagate through this layer. Returns ------- output : Theano expression The output of this layer given the input to this layer. Notes ----- This is called by the base :meth:`lasagne.layers.get_output()` to propagate data through a network. This method should be overridden when implementing a new :class:`Layer` class. By default it raises `NotImplementedError`. """ raise NotImplementedError def add_param(self, spec, shape, name=None, **tags): """ Register and possibly initialize a parameter tensor for the layer. When defining a layer class, this method is called in the constructor to define which parameters the layer has, what their shapes are, how they should be initialized and what tags are associated with them. This allows layer classes to transparently support parameter initialization from numpy arrays and callables, as well as setting parameters to existing Theano shared variables or Theano expressions. All registered parameters are stored along with their tags in the ordered dictionary :attr:`Layer.params`, and can be retrieved with :meth:`Layer.get_params()`, optionally filtered by their tags. Parameters ---------- spec : Theano shared variable, expression, numpy array or callable initial value, expression or initializer for this parameter. See :func:`lasagne.utils.create_param` for more information. shape : tuple of int a tuple of integers representing the desired shape of the parameter tensor. name : str (optional) a descriptive name for the parameter variable. This will be passed to ``theano.shared`` when the variable is created, prefixed by the layer's name if any (in the form ``'layer_name.param_name'``). If ``spec`` is already a shared variable or expression, this parameter will be ignored to avoid overwriting an existing name. **tags (optional) tags associated with the parameter can be specified as keyword arguments. To associate the tag ``tag1`` with the parameter, pass ``tag1=True``. By default, the tags ``regularizable`` and ``trainable`` are associated with the parameter. Pass ``regularizable=False`` or ``trainable=False`` respectively to prevent this. Returns ------- Theano shared variable or Theano expression the resulting parameter variable or parameter expression Notes ----- It is recommended to assign the resulting parameter variable/expression to an attribute of the layer for easy access, for example: >>> self.W = self.add_param(W, (2, 3), name='W') #doctest: +SKIP """ # prefix the param name with the layer name if it exists if name is not None: if self.name is not None: name = "%s.%s" % (self.name, name) # create shared variable, or pass through given variable/expression param = utils.create_param(spec, shape, name) # parameters should be trainable and regularizable by default tags['trainable'] = tags.get('trainable', True) tags['regularizable'] = tags.get('regularizable', True) self.params[param] = set(tag for tag, value in tags.items() if value) return param class MergeLayer(Layer): """ This class represents a layer that aggregates input from multiple layers. It should be subclassed when implementing new types of layers that obtain their input from multiple layers. Parameters ---------- incomings : a list of :class:`Layer` instances or tuples The layers feeding into this layer, or expected input shapes. name : a string or None An optional name to attach to this layer. """ def __init__(self, incomings, name=None): self.input_shapes = [incoming if isinstance(incoming, tuple) else incoming.output_shape for incoming in incomings] self.input_layers = [None if isinstance(incoming, tuple) else incoming for incoming in incomings] self.name = name self.params = OrderedDict() self.get_output_kwargs = [] @Layer.output_shape.getter def output_shape(self): shape = self.get_output_shape_for(self.input_shapes) if any(isinstance(s, T.Variable) for s in shape): raise ValueError("%s returned a symbolic output shape from its " "get_output_shape_for() method: %r. This is not " "allowed; shapes must be tuples of integers for " "fixed-size dimensions and Nones for variable " "dimensions." % (self.__class__.__name__, shape)) return shape def get_output_shape_for(self, input_shapes): """ Computes the output shape of this layer, given a list of input shapes. Parameters ---------- input_shape : list of tuple A list of tuples, with each tuple representing the shape of one of the inputs (in the correct order). These tuples should have as many elements as there are input dimensions, and the elements should be integers or `None`. Returns ------- tuple A tuple representing the shape of the output of this layer. The tuple has as many elements as there are output dimensions, and the elements are all either integers or `None`. Notes ----- This method must be overridden when implementing a new :class:`Layer` class with multiple inputs. By default it raises `NotImplementedError`. """ raise NotImplementedError def get_output_for(self, inputs, **kwargs): """ Propagates the given inputs through this layer (and only this layer). Parameters ---------- inputs : list of Theano expressions The Theano expressions to propagate through this layer. Returns ------- Theano expressions The output of this layer given the inputs to this layer. Notes ----- This is called by the base :meth:`lasagne.layers.get_output()` to propagate data through a network. This method should be overridden when implementing a new :class:`Layer` class with multiple inputs. By default it raises `NotImplementedError`. """ raise NotImplementedError Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/conv.py0000644000175000017500000016360513307306052025322 0ustar sinclairssinclairsimport theano.tensor as T from .. import init from .. import nonlinearities from ..utils import as_tuple, int_types, inspect_kwargs from ..theano_extensions import conv from .base import Layer __all__ = [ "Conv1DLayer", "Conv2DLayer", "Conv3DLayer", "TransposedConv2DLayer", "Deconv2DLayer", "DilatedConv2DLayer", "TransposedConv3DLayer", "Deconv3DLayer", ] def conv_output_length(input_length, filter_size, stride, pad=0): """Helper function to compute the output size of a convolution operation This function computes the length along a single axis, which corresponds to a 1D convolution. It can also be used for convolutions with higher dimensionalities by using it individually for each axis. Parameters ---------- input_length : int or None The size of the input. filter_size : int The size of the filter. stride : int The stride of the convolution operation. pad : int, 'full' or 'same' (default: 0) By default, the convolution is only computed where the input and the filter fully overlap (a valid convolution). When ``stride=1``, this yields an output that is smaller than the input by ``filter_size - 1``. The `pad` argument allows you to implicitly pad the input with zeros, extending the output size. A single integer results in symmetric zero-padding of the given size on both borders. ``'full'`` pads with one less than the filter size on both sides. This is equivalent to computing the convolution wherever the input and the filter overlap by at least one position. ``'same'`` pads with half the filter size on both sides (one less on the second side for an even filter size). When ``stride=1``, this results in an output size equal to the input size. Returns ------- int or None The output size corresponding to the given convolution parameters, or ``None`` if `input_size` is ``None``. Raises ------ ValueError When an invalid padding is specified, a `ValueError` is raised. """ if input_length is None: return None if pad == 'valid': output_length = input_length - filter_size + 1 elif pad == 'full': output_length = input_length + filter_size - 1 elif pad == 'same': output_length = input_length elif isinstance(pad, int_types): output_length = input_length + 2 * pad - filter_size + 1 else: raise ValueError('Invalid pad: {0}'.format(pad)) # This is the integer arithmetic equivalent to # np.ceil(output_length / stride) output_length = (output_length + stride - 1) // stride return output_length def conv_input_length(output_length, filter_size, stride, pad=0): """Helper function to compute the input size of a convolution operation This function computes the length along a single axis, which corresponds to a 1D convolution. It can also be used for convolutions with higher dimensionalities by using it individually for each axis. Parameters ---------- output_length : int or None The size of the output. filter_size : int The size of the filter. stride : int The stride of the convolution operation. pad : int, 'full' or 'same' (default: 0) By default, the convolution is only computed where the input and the filter fully overlap (a valid convolution). When ``stride=1``, this yields an output that is smaller than the input by ``filter_size - 1``. The `pad` argument allows you to implicitly pad the input with zeros, extending the output size. A single integer results in symmetric zero-padding of the given size on both borders. ``'full'`` pads with one less than the filter size on both sides. This is equivalent to computing the convolution wherever the input and the filter overlap by at least one position. ``'same'`` pads with half the filter size on both sides (one less on the second side for an even filter size). When ``stride=1``, this results in an output size equal to the input size. Returns ------- int or None The smallest input size corresponding to the given convolution parameters for the given output size, or ``None`` if `output_size` is ``None``. For a strided convolution, any input size of up to ``stride - 1`` elements larger than returned will still give the same output size. Raises ------ ValueError When an invalid padding is specified, a `ValueError` is raised. Notes ----- This can be used to compute the output size of a convolution backward pass, also called transposed convolution, fractionally-strided convolution or (wrongly) deconvolution in the literature. """ if output_length is None: return None if pad == 'valid': pad = 0 elif pad == 'full': pad = filter_size - 1 elif pad == 'same': pad = filter_size // 2 if not isinstance(pad, int_types): raise ValueError('Invalid pad: {0}'.format(pad)) return (output_length - 1) * stride - 2 * pad + filter_size class BaseConvLayer(Layer): """ lasagne.layers.BaseConvLayer(incoming, num_filters, filter_size, stride=1, pad=0, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=True, n=None, **kwargs) Convolutional layer base class Base class for performing an `n`-dimensional convolution on its input, optionally adding a bias and applying an elementwise nonlinearity. Note that this class cannot be used in a Lasagne network, only its subclasses can (e.g., :class:`Conv1DLayer`, :class:`Conv2DLayer`). Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. Must be a tensor of 2+`n` dimensions: ``(batch_size, num_input_channels, )``. num_filters : int The number of learnable convolutional filters this layer has. filter_size : int or iterable of int An integer or an `n`-element tuple specifying the size of the filters. stride : int or iterable of int An integer or an `n`-element tuple specifying the stride of the convolution operation. pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0) By default, the convolution is only computed where the input and the filter fully overlap (a valid convolution). When ``stride=1``, this yields an output that is smaller than the input by ``filter_size - 1``. The `pad` argument allows you to implicitly pad the input with zeros, extending the output size. A single integer results in symmetric zero-padding of the given size on all borders, a tuple of `n` integers allows different symmetric padding per dimension. ``'full'`` pads with one less than the filter size on both sides. This is equivalent to computing the convolution wherever the input and the filter overlap by at least one position. ``'same'`` pads with half the filter size (rounded down) on both sides. When ``stride=1`` this results in an output size equal to the input size. Even filter size is not supported. ``'valid'`` is an alias for ``0`` (no padding / a valid convolution). Note that ``'full'`` and ``'same'`` can be faster than equivalent integer values due to optimizations by Theano. untie_biases : bool (default: False) If ``False``, the layer will have a bias parameter for each channel, which is shared across all positions in this channel. As a result, the `b` attribute will be a vector (1D). If ``True``, the layer will have separate bias parameters for each position in each channel. As a result, the `b` attribute will be an `n`-dimensional tensor. W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. These should be a tensor of 2+`n` dimensions with shape ``(num_filters, num_input_channels, )``. See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to ``False``. If it is set to ``True``, its shape should be ``(num_filters, )`` instead. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. flip_filters : bool (default: True) Whether to flip the filters before sliding them over the input, performing a convolution (this is the default), or not to flip them and perform a correlation. Note that for some other convolutional layers in Lasagne, flipping incurs an overhead and is disabled by default -- check the documentation when using learned weights from another layer. num_groups : int (default: 1) The number of groups to split the input channels and output channels into, such that data does not cross the group boundaries. Requires the number of channels to be divisible by the number of groups, and requires Theano 0.10 or later for more than one group. n : int or None The dimensionality of the convolution (i.e., the number of spatial dimensions of each feature map and each convolutional filter). If ``None``, will be inferred from the input shape. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. Attributes ---------- W : Theano shared variable or expression Variable or expression representing the filter weights. b : Theano shared variable or expression Variable or expression representing the biases. """ def __init__(self, incoming, num_filters, filter_size, stride=1, pad=0, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, flip_filters=True, num_groups=1, n=None, **kwargs): super(BaseConvLayer, self).__init__(incoming, **kwargs) if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity if n is None: n = len(self.input_shape) - 2 elif n != len(self.input_shape) - 2: raise ValueError("Tried to create a %dD convolution layer with " "input shape %r. Expected %d input dimensions " "(batchsize, channels, %d spatial dimensions)." % (n, self.input_shape, n+2, n)) self.n = n self.num_filters = num_filters self.filter_size = as_tuple(filter_size, n, int_types) self.flip_filters = flip_filters self.stride = as_tuple(stride, n, int_types) self.untie_biases = untie_biases if pad == 'same': if any(s % 2 == 0 for s in self.filter_size): raise NotImplementedError( '`same` padding requires odd filter size.') if pad == 'valid': self.pad = as_tuple(0, n) elif pad in ('full', 'same'): self.pad = pad else: self.pad = as_tuple(pad, n, int_types) if (num_groups <= 0 or self.num_filters % num_groups != 0 or self.input_shape[1] % num_groups != 0): raise ValueError( "num_groups (here: %d) must be positive and evenly divide the " "number of input and output channels (here: %d and %d)" % (num_groups, self.input_shape[1], self.num_filters)) elif (num_groups > 1 and "num_groups" not in inspect_kwargs(T.nnet.conv2d)): raise RuntimeError("num_groups > 1 requires " "Theano 0.10 or later") # pragma: no cover self.num_groups = num_groups self.W = self.add_param(W, self.get_W_shape(), name="W") if b is None: self.b = None else: if self.untie_biases: biases_shape = (num_filters,) + self.output_shape[2:] else: biases_shape = (num_filters,) self.b = self.add_param(b, biases_shape, name="b", regularizable=False) def get_W_shape(self): """Get the shape of the weight matrix `W`. Returns ------- tuple of int The shape of the weight matrix. """ num_input_channels = self.input_shape[1] // self.num_groups return (self.num_filters, num_input_channels) + self.filter_size def get_output_shape_for(self, input_shape): pad = self.pad if isinstance(self.pad, tuple) else (self.pad,) * self.n batchsize = input_shape[0] return ((batchsize, self.num_filters) + tuple(conv_output_length(input, filter, stride, p) for input, filter, stride, p in zip(input_shape[2:], self.filter_size, self.stride, pad))) def get_output_for(self, input, **kwargs): conved = self.convolve(input, **kwargs) if self.b is None: activation = conved elif self.untie_biases: activation = conved + T.shape_padleft(self.b, 1) else: activation = conved + self.b.dimshuffle(('x', 0) + ('x',) * self.n) return self.nonlinearity(activation) def convolve(self, input, **kwargs): """ Symbolically convolves `input` with ``self.W``, producing an output of shape ``self.output_shape``. To be implemented by subclasses. Parameters ---------- input : Theano tensor The input minibatch to convolve **kwargs Any additional keyword arguments from :meth:`get_output_for` Returns ------- Theano tensor `input` convolved according to the configuration of this layer, without any bias or nonlinearity applied. """ raise NotImplementedError("BaseConvLayer does not implement the " "convolve() method. You will want to " "use a subclass such as Conv2DLayer.") class Conv1DLayer(BaseConvLayer): """ lasagne.layers.Conv1DLayer(incoming, num_filters, filter_size, stride=1, pad=0, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=True, convolution=lasagne.theano_extensions.conv.conv1d_mc0, **kwargs) 1D convolutional layer Performs a 1D convolution on its input and optionally adds a bias and applies an elementwise nonlinearity. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. The output of this layer should be a 3D tensor, with shape ``(batch_size, num_input_channels, input_length)``. num_filters : int The number of learnable convolutional filters this layer has. filter_size : int or iterable of int An integer or a 1-element tuple specifying the size of the filters. stride : int or iterable of int An integer or a 1-element tuple specifying the stride of the convolution operation. pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0) By default, the convolution is only computed where the input and the filter fully overlap (a valid convolution). When ``stride=1``, this yields an output that is smaller than the input by ``filter_size - 1``. The `pad` argument allows you to implicitly pad the input with zeros, extending the output size. An integer or a 1-element tuple results in symmetric zero-padding of the given size on both borders. ``'full'`` pads with one less than the filter size on both sides. This is equivalent to computing the convolution wherever the input and the filter overlap by at least one position. ``'same'`` pads with half the filter size (rounded down) on both sides. When ``stride=1`` this results in an output size equal to the input size. Even filter size is not supported. ``'valid'`` is an alias for ``0`` (no padding / a valid convolution). untie_biases : bool (default: False) If ``False``, the layer will have a bias parameter for each channel, which is shared across all positions in this channel. As a result, the `b` attribute will be a vector (1D). If True, the layer will have separate bias parameters for each position in each channel. As a result, the `b` attribute will be a matrix (2D). W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. These should be a 3D tensor with shape ``(num_filters, num_input_channels, filter_length)``. See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to ``False``. If it is set to ``True``, its shape should be ``(num_filters, input_length)`` instead. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. flip_filters : bool (default: True) Whether to flip the filters before sliding them over the input, performing a convolution (this is the default), or not to flip them and perform a correlation. Note that for some other convolutional layers in Lasagne, flipping incurs an overhead and is disabled by default -- check the documentation when using learned weights from another layer. num_groups : int (default: 1) The number of groups to split the input channels and output channels into, such that data does not cross the group boundaries. Requires the number of channels to be divisible by the number of groups, and requires Theano 0.10 or later for more than one group. convolution : callable The convolution implementation to use. The `lasagne.theano_extensions.conv` module provides some alternative implementations for 1D convolutions, because the Theano API only features a 2D convolution implementation. Usually it should be fine to leave this at the default value. Note that not all implementations support all settings for `pad`, `subsample` and `num_groups`. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. Attributes ---------- W : Theano shared variable or expression Variable or expression representing the filter weights. b : Theano shared variable or expression Variable or expression representing the biases. """ def __init__(self, incoming, num_filters, filter_size, stride=1, pad=0, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, flip_filters=True, convolution=conv.conv1d_mc0, **kwargs): super(Conv1DLayer, self).__init__(incoming, num_filters, filter_size, stride, pad, untie_biases, W, b, nonlinearity, flip_filters, n=1, **kwargs) self.convolution = convolution def convolve(self, input, **kwargs): border_mode = 'half' if self.pad == 'same' else self.pad extra_kwargs = {} if self.num_groups > 1: # pragma: no cover extra_kwargs['num_groups'] = self.num_groups conved = self.convolution(input, self.W, self.input_shape, self.get_W_shape(), subsample=self.stride, border_mode=border_mode, filter_flip=self.flip_filters, **extra_kwargs) return conved class Conv2DLayer(BaseConvLayer): """ lasagne.layers.Conv2DLayer(incoming, num_filters, filter_size, stride=(1, 1), pad=0, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=True, convolution=theano.tensor.nnet.conv2d, **kwargs) 2D convolutional layer Performs a 2D convolution on its input and optionally adds a bias and applies an elementwise nonlinearity. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. The output of this layer should be a 4D tensor, with shape ``(batch_size, num_input_channels, input_rows, input_columns)``. num_filters : int The number of learnable convolutional filters this layer has. filter_size : int or iterable of int An integer or a 2-element tuple specifying the size of the filters. stride : int or iterable of int An integer or a 2-element tuple specifying the stride of the convolution operation. pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0) By default, the convolution is only computed where the input and the filter fully overlap (a valid convolution). When ``stride=1``, this yields an output that is smaller than the input by ``filter_size - 1``. The `pad` argument allows you to implicitly pad the input with zeros, extending the output size. A single integer results in symmetric zero-padding of the given size on all borders, a tuple of two integers allows different symmetric padding per dimension. ``'full'`` pads with one less than the filter size on both sides. This is equivalent to computing the convolution wherever the input and the filter overlap by at least one position. ``'same'`` pads with half the filter size (rounded down) on both sides. When ``stride=1`` this results in an output size equal to the input size. Even filter size is not supported. ``'valid'`` is an alias for ``0`` (no padding / a valid convolution). Note that ``'full'`` and ``'same'`` can be faster than equivalent integer values due to optimizations by Theano. untie_biases : bool (default: False) If ``False``, the layer will have a bias parameter for each channel, which is shared across all positions in this channel. As a result, the `b` attribute will be a vector (1D). If True, the layer will have separate bias parameters for each position in each channel. As a result, the `b` attribute will be a 3D tensor. W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. These should be a 4D tensor with shape ``(num_filters, num_input_channels, filter_rows, filter_columns)``. See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to ``False``. If it is set to ``True``, its shape should be ``(num_filters, output_rows, output_columns)`` instead. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. flip_filters : bool (default: True) Whether to flip the filters before sliding them over the input, performing a convolution (this is the default), or not to flip them and perform a correlation. Note that for some other convolutional layers in Lasagne, flipping incurs an overhead and is disabled by default -- check the documentation when using learned weights from another layer. num_groups : int (default: 1) The number of groups to split the input channels and output channels into, such that data does not cross the group boundaries. Requires the number of channels to be divisible by the number of groups, and requires Theano 0.10 or later for more than one group. convolution : callable The convolution implementation to use. Usually it should be fine to leave this at the default value. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. Attributes ---------- W : Theano shared variable or expression Variable or expression representing the filter weights. b : Theano shared variable or expression Variable or expression representing the biases. """ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1), pad=0, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, flip_filters=True, convolution=T.nnet.conv2d, **kwargs): super(Conv2DLayer, self).__init__(incoming, num_filters, filter_size, stride, pad, untie_biases, W, b, nonlinearity, flip_filters, n=2, **kwargs) self.convolution = convolution def convolve(self, input, **kwargs): border_mode = 'half' if self.pad == 'same' else self.pad extra_kwargs = {} if self.num_groups > 1: # pragma: no cover extra_kwargs['num_groups'] = self.num_groups conved = self.convolution(input, self.W, self.input_shape, self.get_W_shape(), subsample=self.stride, border_mode=border_mode, filter_flip=self.flip_filters, **extra_kwargs) return conved class Conv3DLayer(BaseConvLayer): # pragma: no cover """ lasagne.layers.Conv3DLayer(incoming, num_filters, filter_size, stride=(1, 1, 1), pad=0, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=True, convolution=theano.tensor.nnet.conv3d, **kwargs) 3D convolutional layer Performs a 3D convolution on its input and optionally adds a bias and applies an elementwise nonlinearity. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. The output of this layer should be a 5D tensor, with shape ``(batch_size, num_input_channels, input_depth, input_rows, input_columns)``. num_filters : int The number of learnable convolutional filters this layer has. filter_size : int or iterable of int An integer or a 3-element tuple specifying the size of the filters. stride : int or iterable of int An integer or a 3-element tuple specifying the stride of the convolution operation. pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0) By default, the convolution is only computed where the input and the filter fully overlap (a valid convolution). When ``stride=1``, this yields an output that is smaller than the input by ``filter_size - 1``. The `pad` argument allows you to implicitly pad the input with zeros, extending the output size. A single integer results in symmetric zero-padding of the given size on all borders, a tuple of two integers allows different symmetric padding per dimension. ``'full'`` pads with one less than the filter size on both sides. This is equivalent to computing the convolution wherever the input and the filter overlap by at least one position. ``'same'`` pads with half the filter size (rounded down) on both sides. When ``stride=1`` this results in an output size equal to the input size. Even filter size is not supported. ``'valid'`` is an alias for ``0`` (no padding / a valid convolution). Note that ``'full'`` and ``'same'`` can be faster than equivalent integer values due to optimizations by Theano. untie_biases : bool (default: False) If ``False``, the layer will have a bias parameter for each channel, which is shared across all positions in this channel. As a result, the `b` attribute will be a vector (1D). If True, the layer will have separate bias parameters for each position in each channel. As a result, the `b` attribute will be a 4D tensor. W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. These should be a 5D tensor with shape ``(num_filters, num_input_channels, filter_depth, filter_rows, filter_columns)``. See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to ``False``. If it is set to ``True``, its shape should be ``(num_filters, output_depth, output_rows, output_columns)`` instead. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. flip_filters : bool (default: True) Whether to flip the filters before sliding them over the input, performing a convolution (this is the default), or not to flip them and perform a correlation. Note that for some other convolutional layers in Lasagne, flipping incurs an overhead and is disabled by default -- check the documentation when using learned weights from another layer. num_groups : int (default: 1) The number of groups to split the input channels and output channels into, such that data does not cross the group boundaries. Requires the number of channels to be divisible by the number of groups, and requires Theano 0.10 or later for more than one group. convolution : callable The convolution implementation to use. Usually it should be fine to leave this at the default value. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. Attributes ---------- W : Theano shared variable or expression Variable or expression representing the filter weights. b : Theano shared variable or expression Variable or expression representing the biases. """ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1, 1), pad=0, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, flip_filters=True, convolution=None, **kwargs): super(Conv3DLayer, self).__init__(incoming, num_filters, filter_size, stride, pad, untie_biases, W, b, nonlinearity, flip_filters, n=3, **kwargs) if convolution is None: convolution = T.nnet.conv3d self.convolution = convolution def convolve(self, input, **kwargs): border_mode = 'half' if self.pad == 'same' else self.pad extra_kwargs = {} if self.num_groups > 1: # pragma: no cover extra_kwargs['num_groups'] = self.num_groups conved = self.convolution(input, self.W, self.input_shape, self.get_W_shape(), subsample=self.stride, border_mode=border_mode, filter_flip=self.flip_filters, **extra_kwargs) return conved if not hasattr(T.nnet, 'conv3d'): # pragma: no cover # Hide Conv3DLayer for old Theano versions del Conv3DLayer __all__.remove('Conv3DLayer') class TransposedConv2DLayer(BaseConvLayer): """ lasagne.layers.TransposedConv2DLayer(incoming, num_filters, filter_size, stride=(1, 1), crop=0, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False, **kwargs) 2D transposed convolution layer Performs the backward pass of a 2D convolution (also called transposed convolution, fractionally-strided convolution or deconvolution in the literature) on its input and optionally adds a bias and applies an elementwise nonlinearity. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. The output of this layer should be a 4D tensor, with shape ``(batch_size, num_input_channels, input_rows, input_columns)``. num_filters : int The number of learnable convolutional filters this layer has. filter_size : int or iterable of int An integer or a 2-element tuple specifying the size of the filters. stride : int or iterable of int An integer or a 2-element tuple specifying the stride of the transposed convolution operation. For the transposed convolution, this gives the dilation factor for the input -- increasing it increases the output size. crop : int, iterable of int, 'full', 'same' or 'valid' (default: 0) By default, the transposed convolution is computed where the input and the filter overlap by at least one position (a full convolution). When ``stride=1``, this yields an output that is larger than the input by ``filter_size - 1``. It can be thought of as a valid convolution padded with zeros. The `crop` argument allows you to decrease the amount of this zero-padding, reducing the output size. It is the counterpart to the `pad` argument in a non-transposed convolution. A single integer results in symmetric cropping of the given size on all borders, a tuple of two integers allows different symmetric cropping per dimension. ``'full'`` disables zero-padding. It is is equivalent to computing the convolution wherever the input and the filter fully overlap. ``'same'`` pads with half the filter size (rounded down) on both sides. When ``stride=1`` this results in an output size equal to the input size. Even filter size is not supported. ``'valid'`` is an alias for ``0`` (no cropping / a full convolution). Note that ``'full'`` and ``'same'`` can be faster than equivalent integer values due to optimizations by Theano. untie_biases : bool (default: False) If ``False``, the layer will have a bias parameter for each channel, which is shared across all positions in this channel. As a result, the `b` attribute will be a vector (1D). If True, the layer will have separate bias parameters for each position in each channel. As a result, the `b` attribute will be a 3D tensor. W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. These should be a 4D tensor with shape ``(num_input_channels, num_filters, filter_rows, filter_columns)``. Note that the first two dimensions are swapped compared to a non-transposed convolution. See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to ``False``. If it is set to ``True``, its shape should be ``(num_filters, output_rows, output_columns)`` instead. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. flip_filters : bool (default: False) Whether to flip the filters before sliding them over the input, performing a convolution, or not to flip them and perform a correlation (this is the default). Note that this flag is inverted compared to a non-transposed convolution. output_size : int or iterable of int or symbolic tuple of ints The output size of the transposed convolution. Allows to specify which of the possible output shapes to return when stride > 1. If not specified, the smallest shape will be returned. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. Attributes ---------- W : Theano shared variable or expression Variable or expression representing the filter weights. b : Theano shared variable or expression Variable or expression representing the biases. Notes ----- The transposed convolution is implemented as the backward pass of a corresponding non-transposed convolution. It can be thought of as dilating the input (by adding ``stride - 1`` zeros between adjacent input elements), padding it with ``filter_size - 1 - crop`` zeros, and cross-correlating it with the filters. See [1]_ for more background. Examples -------- To transpose an existing convolution, with tied filter weights: >>> from lasagne.layers import Conv2DLayer, TransposedConv2DLayer >>> conv = Conv2DLayer((None, 1, 32, 32), 16, 3, stride=2, pad=2) >>> deconv = TransposedConv2DLayer(conv, conv.input_shape[1], ... conv.filter_size, stride=conv.stride, crop=conv.pad, ... W=conv.W, flip_filters=not conv.flip_filters) References ---------- .. [1] Vincent Dumoulin, Francesco Visin (2016): A guide to convolution arithmetic for deep learning. arXiv. http://arxiv.org/abs/1603.07285, https://github.com/vdumoulin/conv_arithmetic """ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1), crop=0, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, flip_filters=False, output_size=None, **kwargs): # output_size must be set before calling the super constructor if (not isinstance(output_size, T.Variable) and output_size is not None): output_size = as_tuple(output_size, 2, int_types) self.output_size = output_size super(TransposedConv2DLayer, self).__init__( incoming, num_filters, filter_size, stride, crop, untie_biases, W, b, nonlinearity, flip_filters, n=2, **kwargs) # rename self.pad to self.crop: self.crop = self.pad del self.pad def get_W_shape(self): num_input_channels = self.input_shape[1] # first two sizes are swapped compared to a forward convolution return (num_input_channels, self.num_filters) + self.filter_size def get_output_shape_for(self, input_shape): if self.output_size is not None: size = self.output_size if isinstance(self.output_size, T.Variable): size = (None, None) return input_shape[0], self.num_filters, size[0], size[1] # If self.output_size is not specified, return the smallest shape # when called from the constructor, self.crop is still called self.pad: crop = getattr(self, 'crop', getattr(self, 'pad', None)) crop = crop if isinstance(crop, tuple) else (crop,) * self.n batchsize = input_shape[0] return ((batchsize, self.num_filters) + tuple(conv_input_length(input, filter, stride, p) for input, filter, stride, p in zip(input_shape[2:], self.filter_size, self.stride, crop))) def convolve(self, input, **kwargs): border_mode = 'half' if self.crop == 'same' else self.crop op = T.nnet.abstract_conv.AbstractConv2d_gradInputs( imshp=self.output_shape, kshp=self.get_W_shape(), subsample=self.stride, border_mode=border_mode, filter_flip=not self.flip_filters) output_size = self.output_shape[2:] if isinstance(self.output_size, T.Variable): output_size = self.output_size elif any(s is None for s in output_size): output_size = self.get_output_shape_for(input.shape)[2:] conved = op(self.W, input, output_size) return conved Deconv2DLayer = TransposedConv2DLayer class TransposedConv3DLayer(BaseConvLayer): # pragma: no cover """ lasagne.layers.TransposedConv3DLayer(incoming, num_filters, filter_size, stride=(1, 1, 1), crop=0, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False, **kwargs) 3D transposed convolution layer Performs the backward pass of a 3D convolution (also called transposed convolution, fractionally-strided convolution or deconvolution in the literature) on its input and optionally adds a bias and applies an elementwise nonlinearity. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. The output of this layer should be a 5D tensor, with shape ``(batch_size, num_input_channels, input_depth, input_rows, input_columns)``. num_filters : int The number of learnable convolutional filters this layer has. filter_size : int or iterable of int An integer or a 3-element tuple specifying the size of the filters. stride : int or iterable of int An integer or a 3-element tuple specifying the stride of the transposed convolution operation. For the transposed convolution, this gives the dilation factor for the input -- increasing it increases the output size. crop : int, iterable of int, 'full', 'same' or 'valid' (default: 0) By default, the transposed convolution is computed where the input and the filter overlap by at least one position (a full convolution). When ``stride=1``, this yields an output that is larger than the input by ``filter_size - 1``. It can be thought of as a valid convolution padded with zeros. The `crop` argument allows you to decrease the amount of this zero-padding, reducing the output size. It is the counterpart to the `pad` argument in a non-transposed convolution. A single integer results in symmetric cropping of the given size on all borders, a tuple of three integers allows different symmetric cropping per dimension. ``'full'`` disables zero-padding. It is is equivalent to computing the convolution wherever the input and the filter fully overlap. ``'same'`` pads with half the filter size (rounded down) on both sides. When ``stride=1`` this results in an output size equal to the input size. Even filter size is not supported. ``'valid'`` is an alias for ``0`` (no cropping / a full convolution). Note that ``'full'`` and ``'same'`` can be faster than equivalent integer values due to optimizations by Theano. untie_biases : bool (default: False) If ``False``, the layer will have a bias parameter for each channel, which is shared across all positions in this channel. As a result, the `b` attribute will be a vector (1D). If True, the layer will have separate bias parameters for each position in each channel. As a result, the `b` attribute will be a 3D tensor. W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. These should be a 5D tensor with shape ``(num_input_channels, num_filters, filter_rows, filter_columns)``. Note that the first two dimensions are swapped compared to a non-transposed convolution. See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to ``False``. If it is set to ``True``, its shape should be ``(num_filters, output_rows, output_columns)`` instead. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. flip_filters : bool (default: False) Whether to flip the filters before sliding them over the input, performing a convolution, or not to flip them and perform a correlation (this is the default). Note that this flag is inverted compared to a non-transposed convolution. output_size : int or iterable of int or symbolic tuple of ints The output size of the transposed convolution. Allows to specify which of the possible output shapes to return when stride > 1. If not specified, the smallest shape will be returned. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. Attributes ---------- W : Theano shared variable or expression Variable or expression representing the filter weights. b : Theano shared variable or expression Variable or expression representing the biases. Notes ----- The transposed convolution is implemented as the backward pass of a corresponding non-transposed convolution. It can be thought of as dilating the input (by adding ``stride - 1`` zeros between adjacent input elements), padding it with ``filter_size - 1 - crop`` zeros, and cross-correlating it with the filters. See [1]_ for more background. Examples -------- To transpose an existing convolution, with tied filter weights: >>> from lasagne.layers import Conv3DLayer, TransposedConv3DLayer >>> conv = Conv3DLayer((None, 1, 32, 32, 32), 16, 3, stride=2, pad=2) >>> deconv = TransposedConv3DLayer(conv, conv.input_shape[1], ... conv.filter_size, stride=conv.stride, crop=conv.pad, ... W=conv.W, flip_filters=not conv.flip_filters) References ---------- .. [1] Vincent Dumoulin, Francesco Visin (2016): A guide to convolution arithmetic for deep learning. arXiv. http://arxiv.org/abs/1603.07285, https://github.com/vdumoulin/conv_arithmetic """ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1, 1), crop=0, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, flip_filters=False, output_size=None, **kwargs): # output_size must be set before calling the super constructor if (not isinstance(output_size, T.Variable) and output_size is not None): output_size = as_tuple(output_size, 3, int_types) self.output_size = output_size BaseConvLayer.__init__(self, incoming, num_filters, filter_size, stride, crop, untie_biases, W, b, nonlinearity, flip_filters, n=3, **kwargs) # rename self.pad to self.crop: self.crop = self.pad del self.pad def get_W_shape(self): num_input_channels = self.input_shape[1] # first two sizes are swapped compared to a forward convolution return (num_input_channels, self.num_filters) + self.filter_size def get_output_shape_for(self, input_shape): if self.output_size is not None: size = self.output_size if isinstance(self.output_size, T.Variable): size = (None, None, None) return input_shape[0], self.num_filters, size[0], size[1], size[2] # If self.output_size is not specified, return the smallest shape # when called from the constructor, self.crop is still called self.pad: crop = getattr(self, 'crop', getattr(self, 'pad', None)) crop = crop if isinstance(crop, tuple) else (crop,) * self.n batchsize = input_shape[0] return ((batchsize, self.num_filters) + tuple(conv_input_length(input, filter, stride, p) for input, filter, stride, p in zip(input_shape[2:], self.filter_size, self.stride, crop))) def convolve(self, input, **kwargs): border_mode = 'half' if self.crop == 'same' else self.crop op = T.nnet.abstract_conv.AbstractConv3d_gradInputs( imshp=self.output_shape, kshp=self.get_W_shape(), subsample=self.stride, border_mode=border_mode, filter_flip=not self.flip_filters) output_size = self.output_shape[2:] if isinstance(self.output_size, T.Variable): output_size = self.output_size elif any(s is None for s in output_size): output_size = self.get_output_shape_for(input.shape)[2:] conved = op(self.W, input, output_size) return conved Deconv3DLayer = TransposedConv3DLayer if not hasattr(T.nnet.abstract_conv, 'AbstractConv3d_gradInputs'): # pragma: no cover # Hide TransposedConv3DLayer for old Theano versions del TransposedConv3DLayer, Deconv3DLayer __all__.remove('TransposedConv3DLayer') __all__.remove('Deconv3DLayer') class DilatedConv2DLayer(BaseConvLayer): """ lasagne.layers.DilatedConv2DLayer(incoming, num_filters, filter_size, dilation=(1, 1), pad=0, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False, **kwargs) 2D dilated convolution layer Performs a 2D convolution with dilated filters, then optionally adds a bias and applies an elementwise nonlinearity. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. The output of this layer should be a 4D tensor, with shape ``(batch_size, num_input_channels, input_rows, input_columns)``. num_filters : int The number of learnable convolutional filters this layer has. filter_size : int or iterable of int An integer or a 2-element tuple specifying the size of the filters. dilation : int or iterable of int An integer or a 2-element tuple specifying the dilation factor of the filters. A factor of :math:`x` corresponds to :math:`x - 1` zeros inserted between adjacent filter elements. pad : int, iterable of int, or 'valid' (default: 0) The amount of implicit zero padding of the input. This implementation does not support padding, the argument is provided for compatibility to other convolutional layers only. untie_biases : bool (default: False) If ``False``, the layer will have a bias parameter for each channel, which is shared across all positions in this channel. As a result, the `b` attribute will be a vector (1D). If True, the layer will have separate bias parameters for each position in each channel. As a result, the `b` attribute will be a 3D tensor. W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. These should be a 4D tensor with shape ``(num_input_channels, num_filters, filter_rows, filter_columns)``. Note that the first two dimensions are swapped compared to a non-dilated convolution. See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to ``False``. If it is set to ``True``, its shape should be ``(num_filters, output_rows, output_columns)`` instead. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. flip_filters : bool (default: False) Whether to flip the filters before sliding them over the input, performing a convolution, or not to flip them and perform a correlation (this is the default). This implementation does not support flipped filters, the argument is provided for compatibility to other convolutional layers only. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. Attributes ---------- W : Theano shared variable or expression Variable or expression representing the filter weights. b : Theano shared variable or expression Variable or expression representing the biases. Notes ----- The dilated convolution is implemented as the backward pass of a convolution wrt. weights, passing the filters as the output gradient. It can be thought of as dilating the filters (by adding ``dilation - 1`` zeros between adjacent filter elements) and cross-correlating them with the input. See [1]_ for more background. References ---------- .. [1] Fisher Yu, Vladlen Koltun (2016), Multi-Scale Context Aggregation by Dilated Convolutions. ICLR 2016. http://arxiv.org/abs/1511.07122, https://github.com/fyu/dilation """ def __init__(self, incoming, num_filters, filter_size, dilation=(1, 1), pad=0, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, flip_filters=False, **kwargs): self.dilation = as_tuple(dilation, 2, int_types) super(DilatedConv2DLayer, self).__init__( incoming, num_filters, filter_size, 1, pad, untie_biases, W, b, nonlinearity, flip_filters, n=2, **kwargs) # remove self.stride: del self.stride # require valid convolution if self.pad != (0, 0): raise NotImplementedError( "DilatedConv2DLayer requires pad=0 / (0,0) / 'valid', but " "got %r. For a padded dilated convolution, add a PadLayer." % (pad,)) # require unflipped filters if self.flip_filters: raise NotImplementedError( "DilatedConv2DLayer requires flip_filters=False.") def get_W_shape(self): num_input_channels = self.input_shape[1] # first two sizes are swapped compared to a forward convolution return (num_input_channels, self.num_filters) + self.filter_size def get_output_shape_for(self, input_shape): batchsize = input_shape[0] return ((batchsize, self.num_filters) + tuple(conv_output_length(input, (filter-1) * dilate + 1, 1, 0) for input, filter, dilate in zip(input_shape[2:], self.filter_size, self.dilation))) def convolve(self, input, **kwargs): # we perform a convolution backward pass wrt weights, # passing kernels as output gradient imshp = self.input_shape kshp = self.output_shape # and swapping channels and batchsize imshp = (imshp[1], imshp[0]) + imshp[2:] kshp = (kshp[1], kshp[0]) + kshp[2:] op = T.nnet.abstract_conv.AbstractConv2d_gradWeights( imshp=imshp, kshp=kshp, subsample=self.dilation, border_mode='valid', filter_flip=False) output_size = self.output_shape[2:] if any(s is None for s in output_size): output_size = self.get_output_shape_for(input.shape)[2:] conved = op(input.transpose(1, 0, 2, 3), self.W, output_size) return conved.transpose(1, 0, 2, 3) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/dnn.py0000644000175000017500000011066613307306052025133 0ustar sinclairssinclairsimport warnings import theano from .. import init from .. import nonlinearities from .base import Layer from .conv import conv_output_length, BaseConvLayer from .pool import pool_output_length from .normalization import BatchNormLayer from ..utils import as_tuple # check if Theano's new GPU backend is available and in use try: from theano import gpuarray as gpu except ImportError: from theano.sandbox import gpuarray as gpu gpu_enabled = gpu.pygpu_activated dnn_enabled = gpu.dnn.dnn_present # if not, try to fall back to Theano's old GPU backend if not gpu_enabled: try: from theano.sandbox import cuda as gpu import theano.sandbox.cuda.dnn except Exception: # Theano 0.10+ raises nose.SkipTest gpu_enabled = False else: gpu_enabled = gpu.cuda_enabled dnn_enabled = gpu.dnn.dnn_available # if either of the backends is available, use it, otherwise bail out if gpu_enabled: if dnn_enabled(): dnn = gpu.dnn else: raise ImportError( "cuDNN not available: %s\nSee http://lasagne.readthedocs.org\ /en/latest/user/installation.html#cudnn\ " % dnn_enabled.msg) # pragma: no cover else: raise ImportError( "requires GPU support -- see http://lasagne.readthedocs.org/en/" "latest/user/installation.html#gpu-support") # pragma: no cover if theano.config.floatX == 'float64': warnings.warn("You are using a GPU layer with Theano configured for " "double precision (floatX=float64). Depending on your " "Theano version and GPU, this may be slow or unsupported." "We recommend to configure Theano for single precision " "(floatX=float32); see http://lasagne.readthedocs.org/en/" "latest/user/installation.html#gpu-support.") __all__ = [ "Pool2DDNNLayer", "MaxPool2DDNNLayer", "Pool3DDNNLayer", "MaxPool3DDNNLayer", "Conv2DDNNLayer", "Conv3DDNNLayer", "SpatialPyramidPoolingDNNLayer", "BatchNormDNNLayer", "batch_norm_dnn", ] class Pool2DDNNLayer(Layer): """ 2D pooling layer Performs 2D mean- or max-pooling over the two trailing axes of a 4D input tensor. This is an alternative implementation which uses ``theano.sandbox.cuda.dnn.dnn_pool`` directly. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer or iterable The length of the pooling region in each dimension. If an integer, it is promoted to a square pooling region. If an iterable, it should have two elements. stride : integer, iterable or ``None`` The strides between sucessive pooling regions in each dimension. If ``None`` then ``stride = pool_size``. pad : integer or iterable Number of elements to be added on each side of the input in each dimension. Each value must be less than the corresponding stride. ignore_border : bool (default: True) This implementation never includes partial pooling regions, so this argument must always be set to True. It exists only to make sure the interface is compatible with :class:`lasagne.layers.MaxPool2DLayer`. mode : string Pooling mode, one of 'max', 'average_inc_pad' or 'average_exc_pad'. Defaults to 'max'. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- The value used to pad the input is chosen to be less than the minimum of the input, so that the output of each pooling region always corresponds to some element in the unpadded input region. This is a drop-in replacement for :class:`lasagne.layers.MaxPool2DLayer`. Its interface is the same, except it does not support the ``ignore_border`` argument. """ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0), ignore_border=True, mode='max', **kwargs): super(Pool2DDNNLayer, self).__init__(incoming, **kwargs) if len(self.input_shape) != 4: raise ValueError("Tried to create a 2D pooling layer with " "input shape %r. Expected 4 input dimensions " "(batchsize, channels, 2 spatial dimensions)." % (self.input_shape,)) self.pool_size = as_tuple(pool_size, 2) if stride is None: self.stride = self.pool_size else: self.stride = as_tuple(stride, 2) self.pad = as_tuple(pad, 2) self.mode = mode # The ignore_border argument is for compatibility with MaxPool2DLayer. # ignore_border=False is not supported. Borders are always ignored. if not ignore_border: raise NotImplementedError("Pool2DDNNLayer does not support " "ignore_border=False.") def get_output_shape_for(self, input_shape): output_shape = list(input_shape) # copy / convert to mutable list output_shape[2] = pool_output_length(input_shape[2], pool_size=self.pool_size[0], stride=self.stride[0], pad=self.pad[0], ignore_border=True, ) output_shape[3] = pool_output_length(input_shape[3], pool_size=self.pool_size[1], stride=self.stride[1], pad=self.pad[1], ignore_border=True, ) return tuple(output_shape) def get_output_for(self, input, **kwargs): return dnn.dnn_pool(input, self.pool_size, self.stride, self.mode, self.pad) class MaxPool2DDNNLayer(Pool2DDNNLayer): """ 2D max-pooling layer Subclass of :class:`Pool2DDNNLayer` fixing ``mode='max'``, provided for compatibility to other ``MaxPool2DLayer`` classes. """ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0), ignore_border=True, **kwargs): super(MaxPool2DDNNLayer, self).__init__(incoming, pool_size, stride, pad, ignore_border, mode='max', **kwargs) class Pool3DDNNLayer(Layer): """ 3D pooling layer Performs 3D mean- or max-pooling over the 3 trailing axes of a 5D input tensor. This is an alternative implementation which uses ``theano.sandbox.cuda.dnn.dnn_pool`` directly. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer or iterable The length of the pooling region in each dimension. If an integer, it is promoted to a square pooling region. If an iterable, it should have two elements. stride : integer, iterable or ``None`` The strides between sucessive pooling regions in each dimension. If ``None`` then ``stride = pool_size``. pad : integer or iterable Number of elements to be added on each side of the input in each dimension. Each value must be less than the corresponding stride. ignore_border : bool (default: True) This implementation never includes partial pooling regions, so this argument must always be set to True. It exists only to make sure the interface is compatible with :class:`lasagne.layers.MaxPool2DLayer`. mode : string Pooling mode, one of 'max', 'average_inc_pad' or 'average_exc_pad'. Defaults to 'max'. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- The value used to pad the input is chosen to be less than the minimum of the input, so that the output of each pooling region always corresponds to some element in the unpadded input region. """ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0, 0), ignore_border=True, mode='max', **kwargs): super(Pool3DDNNLayer, self).__init__(incoming, **kwargs) if len(self.input_shape) != 5: raise ValueError("Tried to create a 3D pooling layer with " "input shape %r. Expected 5 input dimensions " "(batchsize, channels, 3 spatial dimensions)." % (self.input_shape,)) self.pool_size = as_tuple(pool_size, 3) if stride is None: self.stride = self.pool_size else: self.stride = as_tuple(stride, 3) self.pad = as_tuple(pad, 3) self.mode = mode # The ignore_border argument is for compatibility with MaxPool2DLayer. # ignore_border=False is not supported. Borders are always ignored. if not ignore_border: raise NotImplementedError("Pool3DDNNLayer does not support " "ignore_border=False.") def get_output_shape_for(self, input_shape): output_shape = list(input_shape) # copy / convert to mutable list output_shape[2] = pool_output_length(input_shape[2], pool_size=self.pool_size[0], stride=self.stride[0], pad=self.pad[0], ignore_border=True, ) output_shape[3] = pool_output_length(input_shape[3], pool_size=self.pool_size[1], stride=self.stride[1], pad=self.pad[1], ignore_border=True, ) output_shape[4] = pool_output_length(input_shape[4], pool_size=self.pool_size[2], stride=self.stride[2], pad=self.pad[2], ignore_border=True, ) return tuple(output_shape) def get_output_for(self, input, **kwargs): return dnn.dnn_pool(input, self.pool_size, self.stride, self.mode, self.pad) class MaxPool3DDNNLayer(Pool3DDNNLayer): """ 3D max-pooling layer Subclass of :class:`Pool3DDNNLayer` fixing ``mode='max'``, provided for consistency to ``MaxPool2DLayer`` classes. """ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0, 0), ignore_border=True, **kwargs): super(MaxPool3DDNNLayer, self).__init__(incoming, pool_size, stride, pad, ignore_border, mode='max', **kwargs) class Conv2DDNNLayer(BaseConvLayer): """ lasagne.layers.Conv2DDNNLayer(incoming, num_filters, filter_size, stride=(1, 1), pad=0, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False, **kwargs) 2D convolutional layer Performs a 2D convolution on its input and optionally adds a bias and applies an elementwise nonlinearity. This is an alternative implementation which uses ``theano.sandbox.cuda.dnn.dnn_conv`` directly. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. The output of this layer should be a 4D tensor, with shape ``(batch_size, num_input_channels, input_rows, input_columns)``. num_filters : int The number of learnable convolutional filters this layer has. filter_size : int or iterable of int An integer or a 2-element tuple specifying the size of the filters. stride : int or iterable of int An integer or a 2-element tuple specifying the stride of the convolution operation. pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0) By default, the convolution is only computed where the input and the filter fully overlap (a valid convolution). When ``stride=1``, this yields an output that is smaller than the input by ``filter_size - 1``. The `pad` argument allows you to implicitly pad the input with zeros, extending the output size. A single integer results in symmetric zero-padding of the given size on all borders, a tuple of two integers allows different symmetric padding per dimension. ``'full'`` pads with one less than the filter size on both sides. This is equivalent to computing the convolution wherever the input and the filter overlap by at least one position. ``'same'`` pads with half the filter size (rounded down) on both sides. When ``stride=1`` this results in an output size equal to the input size. Even filter size is not supported. ``'valid'`` is an alias for ``0`` (no padding / a valid convolution). Note that ``'full'`` and ``'same'`` can be faster than equivalent integer values due to optimizations by Theano. untie_biases : bool (default: False) If ``False``, the layer will have a bias parameter for each channel, which is shared across all positions in this channel. As a result, the `b` attribute will be a vector (1D). If True, the layer will have separate bias parameters for each position in each channel. As a result, the `b` attribute will be a 3D tensor. W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. These should be a 4D tensor with shape ``(num_filters, num_input_channels, filter_rows, filter_columns)``. See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to ``False``. If it is set to ``True``, its shape should be ``(num_filters, output_rows, output_columns)`` instead. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. flip_filters : bool (default: False) Whether to flip the filters and perform a convolution, or not to flip them and perform a correlation. Flipping adds a bit of overhead, so it is disabled by default. In most cases this does not make a difference anyway because the filters are learnt. However, ``flip_filters`` should be set to ``True`` if weights are loaded into it that were learnt using a regular :class:`lasagne.layers.Conv2DLayer`, for example. num_groups : int (default: 1) The number of groups to split the input channels and output channels into, such that data does not cross the group boundaries. Requires the number of channels to be divisible by the number of groups, and requires Theano 0.10 or later for more than one group. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. Attributes ---------- W : Theano shared variable or expression Variable or expression representing the filter weights. b : Theano shared variable or expression Variable or expression representing the biases. """ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1), pad=0, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, flip_filters=False, num_groups=1, **kwargs): super(Conv2DDNNLayer, self).__init__(incoming, num_filters, filter_size, stride, pad, untie_biases, W, b, nonlinearity, flip_filters, num_groups, n=2, **kwargs) def convolve(self, input, **kwargs): # by default we assume 'cross', consistent with corrmm. conv_mode = 'conv' if self.flip_filters else 'cross' border_mode = self.pad if border_mode == 'same': border_mode = tuple(s // 2 for s in self.filter_size) extra_kwargs = {} if self.num_groups > 1: # pragma: no cover extra_kwargs = {'num_groups': self.num_groups} conved = dnn.dnn_conv(img=input, kerns=self.W, subsample=self.stride, border_mode=border_mode, conv_mode=conv_mode, **extra_kwargs) return conved class Conv3DDNNLayer(BaseConvLayer): """ lasagne.layers.Conv3DDNNLayer(incoming, num_filters, filter_size, stride=(1, 1, 1), pad=0, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False, **kwargs) 3D convolutional layer Performs a 3D convolution on its input and optionally adds a bias and applies an elementwise nonlinearity. This implementation uses ``theano.sandbox.cuda.dnn.dnn_conv3d`` directly. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. The output of this layer should be a 5D tensor, with shape ``(batch_size, num_input_channels, input_depth, input_rows, input_columns)``. num_filters : int The number of learnable convolutional filters this layer has. filter_size : int or iterable of int An integer or a 3-element tuple specifying the size of the filters. stride : int or iterable of int An integer or a 3-element tuple specifying the stride of the convolution operation. pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0) By default, the convolution is only computed where the input and the filter fully overlap (a valid convolution). When ``stride=1``, this yields an output that is smaller than the input by ``filter_size - 1``. The `pad` argument allows you to implicitly pad the input with zeros, extending the output size. A single integer results in symmetric zero-padding of the given size on all borders, a tuple of three integers allows different symmetric padding per dimension. ``'full'`` pads with one less than the filter size on both sides. This is equivalent to computing the convolution wherever the input and the filter overlap by at least one position. ``'same'`` pads with half the filter size (rounded down) on both sides. When ``stride=1`` this results in an output size equal to the input size. Even filter size is not supported. ``'valid'`` is an alias for ``0`` (no padding / a valid convolution). Note that ``'full'`` and ``'same'`` can be faster than equivalent integer values due to optimizations by Theano. untie_biases : bool (default: False) If ``False``, the layer will have a bias parameter for each channel, which is shared across all positions in this channel. As a result, the `b` attribute will be a vector (1D). If True, the layer will have separate bias parameters for each position in each channel. As a result, the `b` attribute will be a 4D tensor. W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. These should be a 5D tensor with shape ``(num_filters, num_input_channels, filter_depth, filter_rows, filter_columns)``. See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to ``False``. If it is set to ``True``, its shape should be ``(num_filters, output_depth, output_rows, output_columns)`` instead. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. flip_filters : bool (default: False) Whether to flip the filters and perform a convolution, or not to flip them and perform a correlation. Flipping adds a bit of overhead, so it is disabled by default. In most cases this does not make a difference anyway because the filters are learned, but if you want to compute predictions with pre-trained weights, take care if they need flipping. num_groups : int (default: 1) The number of groups to split the input channels and output channels into, such that data does not cross the group boundaries. Requires the number of channels to be divisible by the number of groups, and requires Theano 0.10 or later for more than one group. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. Attributes ---------- W : Theano shared variable or expression Variable or expression representing the filter weights. b : Theano shared variable or expression Variable or expression representing the biases. """ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1, 1), pad=0, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, flip_filters=False, num_groups=1, **kwargs): super(Conv3DDNNLayer, self).__init__(incoming, num_filters, filter_size, stride, pad, untie_biases, W, b, nonlinearity, flip_filters, num_groups, n=3, **kwargs) def convolve(self, input, **kwargs): # by default we assume 'cross', consistent with corrmm. conv_mode = 'conv' if self.flip_filters else 'cross' border_mode = self.pad if border_mode == 'same': border_mode = tuple(s // 2 for s in self.filter_size) extra_kwargs = {} if self.num_groups > 1: extra_kwargs = {'num_groups': self.num_groups} conved = dnn.dnn_conv3d(img=input, kerns=self.W, subsample=self.stride, border_mode=border_mode, conv_mode=conv_mode, **extra_kwargs) return conved class SpatialPyramidPoolingDNNLayer(Layer): """ Spatial Pyramid Pooling Layer Performs spatial pyramid pooling (SPP) over the input. It will turn a 2D input of arbitrary size into an output of fixed dimension. Hence, the convolutional part of a DNN can be connected to a dense part with a fixed number of nodes even if the dimensions of the input image are unknown. The pooling is performed over :math:`l` pooling levels. Each pooling level :math:`i` will create :math:`M_i` output features. :math:`M_i` is given by :math:`n_i * n_i`, with :math:`n_i` as the number of pooling operation per dimension in level :math:`i`, and we use a list of the :math:`n_i`'s as a parameter for SPP-Layer. The length of this list is the level of the spatial pyramid. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_dims : list of integers The list of :math:`n_i`'s that define the output dimension of each pooling level :math:`i`. The length of pool_dims is the level of the spatial pyramid. mode : string Pooling mode, one of 'max', 'average_inc_pad' or 'average_exc_pad'. Defaults to 'max'. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- This layer should be inserted between the convolutional part of a DNN and its dense part. Convolutions can be used for arbitrary input dimensions, but the size of their output will depend on their input dimensions. Connecting the output of the convolutional to the dense part then usually demands us to fix the dimensions of the network's InputLayer. The spatial pyramid pooling layer, however, allows us to leave the network input dimensions arbitrary. The advantage over a global pooling layer is the added robustness against object deformations due to the pooling on different scales. References ---------- .. [1] He, Kaiming et al (2015): Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition. http://arxiv.org/pdf/1406.4729.pdf. """ def __init__(self, incoming, pool_dims=[4, 2, 1], mode='max', **kwargs): super(SpatialPyramidPoolingDNNLayer, self).__init__(incoming, **kwargs) if len(self.input_shape) != 4: raise ValueError("Tried to create a SPP layer with " "input shape %r. Expected 4 input dimensions " "(batchsize, channels, 2 spatial dimensions)." % (self.input_shape,)) self.mode = mode self.pool_dims = pool_dims def get_output_for(self, input, **kwargs): input_size = tuple(symb if fixed is None else fixed for fixed, symb in zip(self.input_shape[2:], input.shape[2:])) pool_list = [] for pool_dim in self.pool_dims: win_size = tuple((i + pool_dim - 1) // pool_dim for i in input_size) str_size = tuple(i // pool_dim for i in input_size) pool = dnn.dnn_pool(input, win_size, str_size, self.mode, (0, 0)) pool = pool.flatten(3) pool_list.append(pool) return theano.tensor.concatenate(pool_list, axis=2) def get_output_shape_for(self, input_shape): num_features = sum(p*p for p in self.pool_dims) return (input_shape[0], input_shape[1], num_features) class BatchNormDNNLayer(BatchNormLayer): """ lasagne.layers.BatchNormDNNLayer(incoming, axes='auto', epsilon=1e-4, alpha=0.1, beta=lasagne.init.Constant(0), gamma=lasagne.init.Constant(1), mean=lasagne.init.Constant(0), inv_std=lasagne.init.Constant(1), **kwargs) Batch Normalization This layer implements batch normalization of its inputs: .. math:: y = \\frac{x - \\mu}{\\sqrt{\\sigma^2 + \\epsilon}} \\gamma + \\beta This is a drop-in replacement for :class:`lasagne.layers.BatchNormLayer` that uses cuDNN for improved performance and reduced memory usage. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape axes : 'auto', int or tuple of int The axis or axes to normalize over. If ``'auto'`` (the default), normalize over all axes except for the second: this will normalize over the minibatch dimension for dense layers, and additionally over all spatial dimensions for convolutional layers. Only supports ``'auto'`` and the equivalent axes list, or ``0`` and ``(0,)`` to normalize over the minibatch dimension only. epsilon : scalar Small constant :math:`\\epsilon` added to the variance before taking the square root and dividing by it, to avoid numerical problems. Must not be smaller than ``1e-5``. alpha : scalar Coefficient for the exponential moving average of batch-wise means and standard deviations computed during training; the closer to one, the more it will depend on the last batches seen beta : Theano shared variable, expression, numpy array, callable or None Initial value, expression or initializer for :math:`\\beta`. Must match the incoming shape, skipping all axes in `axes`. Set to ``None`` to fix it to 0.0 instead of learning it. See :func:`lasagne.utils.create_param` for more information. gamma : Theano shared variable, expression, numpy array, callable or None Initial value, expression or initializer for :math:`\\gamma`. Must match the incoming shape, skipping all axes in `axes`. Set to ``None`` to fix it to 1.0 instead of learning it. See :func:`lasagne.utils.create_param` for more information. mean : Theano shared variable, expression, numpy array, or callable Initial value, expression or initializer for :math:`\\mu`. Must match the incoming shape, skipping all axes in `axes`. See :func:`lasagne.utils.create_param` for more information. inv_std : Theano shared variable, expression, numpy array, or callable Initial value, expression or initializer for :math:`1 / \\sqrt{ \\sigma^2 + \\epsilon}`. Must match the incoming shape, skipping all axes in `axes`. See :func:`lasagne.utils.create_param` for more information. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- This layer should be inserted between a linear transformation (such as a :class:`DenseLayer`, or :class:`Conv2DLayer`) and its nonlinearity. The convenience function :func:`batch_norm_dnn` modifies an existing layer to insert cuDNN batch normalization in front of its nonlinearity. For further information, see :class:`lasagne.layers.BatchNormLayer`. This implementation is fully compatible, except for restrictions on the `axes` and `epsilon` arguments. See also -------- batch_norm_dnn : Convenience function to apply batch normalization """ def __init__(self, incoming, axes='auto', epsilon=1e-4, alpha=0.1, beta=init.Constant(0), gamma=init.Constant(1), mean=init.Constant(0), inv_std=init.Constant(1), **kwargs): super(BatchNormDNNLayer, self).__init__( incoming, axes, epsilon, alpha, beta, gamma, mean, inv_std, **kwargs) all_but_second_axis = (0,) + tuple(range(2, len(self.input_shape))) if self.axes not in ((0,), all_but_second_axis): raise ValueError("BatchNormDNNLayer only supports normalization " "across the first axis, or across all but the " "second axis, got axes=%r" % (axes,)) def get_output_for(self, input, deterministic=False, batch_norm_use_averages=None, batch_norm_update_averages=None, **kwargs): # Decide whether to use the stored averages or mini-batch statistics if batch_norm_use_averages is None: batch_norm_use_averages = deterministic use_averages = batch_norm_use_averages # Decide whether to update the stored averages if batch_norm_update_averages is None: batch_norm_update_averages = not deterministic update_averages = batch_norm_update_averages # prepare dimshuffle pattern inserting broadcastable axes as needed param_axes = iter(range(input.ndim - len(self.axes))) pattern = ['x' if input_axis in self.axes else next(param_axes) for input_axis in range(input.ndim)] # and prepare the converse pattern removing those broadcastable axes unpattern = [d for d in range(input.ndim) if d not in self.axes] # call cuDNN if needed, obtaining normalized outputs and statistics if not use_averages or update_averages: # cuDNN requires beta/gamma tensors; create them if needed shape = tuple(s for (d, s) in enumerate(input.shape) if d not in self.axes) gamma = self.gamma or theano.tensor.ones(shape) beta = self.beta or theano.tensor.zeros(shape) mode = 'per-activation' if self.axes == (0,) else 'spatial' (normalized, input_mean, input_inv_std) = dnn.dnn_batch_normalization_train( input, gamma.dimshuffle(pattern), beta.dimshuffle(pattern), mode, self.epsilon) # normalize with stored averages, if needed if use_averages: mean = self.mean.dimshuffle(pattern) inv_std = self.inv_std.dimshuffle(pattern) gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern) beta = 0 if self.beta is None else self.beta.dimshuffle(pattern) normalized = (input - mean) * (gamma * inv_std) + beta # update stored averages, if needed if update_averages: # Trick: To update the stored statistics, we create memory-aliased # clones of the stored statistics: running_mean = theano.clone(self.mean, share_inputs=False) running_inv_std = theano.clone(self.inv_std, share_inputs=False) # set a default update for them: running_mean.default_update = ((1 - self.alpha) * running_mean + self.alpha * input_mean.dimshuffle(unpattern)) running_inv_std.default_update = ((1 - self.alpha) * running_inv_std + self.alpha * input_inv_std.dimshuffle(unpattern)) # and make sure they end up in the graph without participating in # the computation (this way their default_update will be collected # and applied, but the computation will be optimized away): dummy = 0 * (running_mean + running_inv_std).dimshuffle(pattern) normalized = normalized + dummy return normalized def batch_norm_dnn(layer, **kwargs): """ Apply cuDNN batch normalization to an existing layer. This is a drop-in replacement for :func:`lasagne.layers.batch_norm`; see there for further information. Parameters ---------- layer : A :class:`Layer` instance The layer to apply the normalization to; note that it will be modified as specified in :func:`lasagne.layers.batch_norm` **kwargs Any additional keyword arguments are passed on to the :class:`BatchNormDNNLayer` constructor. Returns ------- BatchNormDNNLayer or NonlinearityLayer instance A batch normalization layer stacked on the given modified `layer`, or a nonlinearity layer stacked on top of both if `layer` was nonlinear. """ nonlinearity = getattr(layer, 'nonlinearity', None) if nonlinearity is not None: layer.nonlinearity = nonlinearities.identity if hasattr(layer, 'b') and layer.b is not None: del layer.params[layer.b] layer.b = None bn_name = (kwargs.pop('name', None) or (getattr(layer, 'name', None) and layer.name + '_bn')) layer = BatchNormDNNLayer(layer, name=bn_name, **kwargs) if nonlinearity is not None: from .special import NonlinearityLayer nonlin_name = bn_name and bn_name + '_nonlin' layer = NonlinearityLayer(layer, nonlinearity, name=nonlin_name) return layer if not hasattr(dnn, 'dnn_batch_normalization_train'): # Hide cuDNN-based batch normalization for old Theano versions del BatchNormDNNLayer, batch_norm_dnn __all__.remove('BatchNormDNNLayer') __all__.remove('batch_norm_dnn') Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/corrmm.py0000644000175000017500000001706113307306052025646 0ustar sinclairssinclairsimport warnings import theano from .. import init from .. import nonlinearities from .base import Layer from .conv import conv_output_length, BaseConvLayer from ..utils import as_tuple # check if Theano's new GPU backend is available and in use try: from theano import gpuarray as gpu except ImportError: from theano.sandbox import gpuarray as gpu gpu_enabled = gpu.pygpu_activated # if not, try to fall back to Theano's old GPU backend if not gpu_enabled: try: from theano.sandbox import cuda as gpu except Exception: # Theano 0.10+ raises nose.SkipTest gpu_enabled = False else: gpu_enabled = gpu.cuda_enabled # if either of the backends is available, use it, otherwise bail out if gpu_enabled: gpu_contiguous = gpu.basic_ops.gpu_contiguous GpuCorrMM = gpu.blas.GpuCorrMM else: raise ImportError( "requires GPU support -- see http://lasagne.readthedocs.org/en/" "latest/user/installation.html#gpu-support") # pragma: no cover if theano.config.floatX == 'float64': warnings.warn("You are using a GPU layer with Theano configured for " "double precision (floatX=float64). Depending on your " "Theano version and GPU, this may be slow or unsupported. " "We recommend to configure Theano for single precision " "(floatX=float32); see http://lasagne.readthedocs.org/en/" "latest/user/installation.html#gpu-support.") __all__ = [ "Conv2DMMLayer", ] class Conv2DMMLayer(BaseConvLayer): """ lasagne.layers.Conv2DMMLayer(incoming, num_filters, filter_size, stride=(1, 1), pad=0, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False, **kwargs) 2D convolutional layer Performs a 2D convolution on its input and optionally adds a bias and applies an elementwise nonlinearity. This is an alternative implementation which uses ``theano.sandbox.cuda.blas.GpuCorrMM`` directly. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape. The output of this layer should be a 4D tensor, with shape ``(batch_size, num_input_channels, input_rows, input_columns)``. num_filters : int The number of learnable convolutional filters this layer has. filter_size : int or iterable of int An integer or a 2-element tuple specifying the size of the filters. stride : int or iterable of int An integer or a 2-element tuple specifying the stride of the convolution operation. pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0) By default, the convolution is only computed where the input and the filter fully overlap (a valid convolution). When ``stride=1``, this yields an output that is smaller than the input by ``filter_size - 1``. The `pad` argument allows you to implicitly pad the input with zeros, extending the output size. A single integer results in symmetric zero-padding of the given size on all borders, a tuple of two integers allows different symmetric padding per dimension. ``'full'`` pads with one less than the filter size on both sides. This is equivalent to computing the convolution wherever the input and the filter overlap by at least one position. ``'same'`` pads with half the filter size (rounded down) on both sides. When ``stride=1`` this results in an output size equal to the input size. Even filter size is not supported. ``'valid'`` is an alias for ``0`` (no padding / a valid convolution). Note that ``'full'`` and ``'same'`` can be faster than equivalent integer values due to optimizations by Theano. untie_biases : bool (default: False) If ``False``, the layer will have a bias parameter for each channel, which is shared across all positions in this channel. As a result, the `b` attribute will be a vector (1D). If True, the layer will have separate bias parameters for each position in each channel. As a result, the `b` attribute will be a 3D tensor. W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. These should be a 4D tensor with shape ``(num_filters, num_input_channels, filter_rows, filter_columns)``. See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to ``False``. If it is set to ``True``, its shape should be ``(num_filters, output_rows, output_columns)`` instead. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. flip_filters : bool (default: False) Whether to flip the filters and perform a convolution, or not to flip them and perform a correlation. Flipping adds a bit of overhead, so it is disabled by default. In most cases this does not make a difference anyway because the filters are learnt. However, ``flip_filters`` should be set to ``True`` if weights are loaded into it that were learnt using a regular :class:`lasagne.layers.Conv2DLayer`, for example. num_groups : int (default: 1) The number of groups to split the input channels and output channels into, such that data does not cross the group boundaries. Requires the number of channels to be divisible by the number of groups, and requires Theano 0.10 or later for more than one group. **kwargs Any additional keyword arguments are passed to the `Layer` superclass. Attributes ---------- W : Theano shared variable Variable representing the filter weights. b : Theano shared variable Variable representing the biases. """ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1), pad=0, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, flip_filters=False, num_groups=1, **kwargs): super(Conv2DMMLayer, self).__init__(incoming, num_filters, filter_size, stride, pad, untie_biases, W, b, nonlinearity, flip_filters, num_groups, n=2, **kwargs) border_mode = 'half' if self.pad == 'same' else self.pad extra_kwargs = {'num_groups': num_groups} if num_groups > 1 else {} self.corr_mm_op = GpuCorrMM(subsample=self.stride, border_mode=border_mode, **extra_kwargs) def convolve(self, input, **kwargs): filters = self.W if self.flip_filters: filters = filters[:, :, ::-1, ::-1] # flip top-down, left-right contiguous_filters = gpu_contiguous(filters) contiguous_input = gpu_contiguous(input) conved = self.corr_mm_op(contiguous_input, contiguous_filters) return conved Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/layers/dense.py0000644000175000017500000002161013307306052025440 0ustar sinclairssinclairsimport numpy as np import theano.tensor as T from .. import init from .. import nonlinearities from .base import Layer __all__ = [ "DenseLayer", "NINLayer", ] class DenseLayer(Layer): """ lasagne.layers.DenseLayer(incoming, num_units, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, num_leading_axes=1, **kwargs) A fully connected layer. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape num_units : int The number of units of the layer W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. These should be a matrix with shape ``(num_inputs, num_units)``. See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_units,)``. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. num_leading_axes : int Number of leading axes to distribute the dot product over. These axes will be kept in the output tensor, remaining axes will be collapsed and multiplied against the weight matrix. A negative number gives the (negated) number of trailing axes to involve in the dot product. Examples -------- >>> from lasagne.layers import InputLayer, DenseLayer >>> l_in = InputLayer((100, 20)) >>> l1 = DenseLayer(l_in, num_units=50) If the input has more than two axes, by default, all trailing axes will be flattened. This is useful when a dense layer follows a convolutional layer. >>> l_in = InputLayer((None, 10, 20, 30)) >>> DenseLayer(l_in, num_units=50).output_shape (None, 50) Using the `num_leading_axes` argument, you can specify to keep more than just the first axis. E.g., to apply the same dot product to each step of a batch of time sequences, you would want to keep the first two axes. >>> DenseLayer(l_in, num_units=50, num_leading_axes=2).output_shape (None, 10, 50) >>> DenseLayer(l_in, num_units=50, num_leading_axes=-1).output_shape (None, 10, 20, 50) """ def __init__(self, incoming, num_units, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, num_leading_axes=1, **kwargs): super(DenseLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (nonlinearities.identity if nonlinearity is None else nonlinearity) self.num_units = num_units if num_leading_axes >= len(self.input_shape): raise ValueError( "Got num_leading_axes=%d for a %d-dimensional input, " "leaving no trailing axes for the dot product." % (num_leading_axes, len(self.input_shape))) elif num_leading_axes < -len(self.input_shape): raise ValueError( "Got num_leading_axes=%d for a %d-dimensional input, " "requesting more trailing axes than there are input " "dimensions." % (num_leading_axes, len(self.input_shape))) self.num_leading_axes = num_leading_axes if any(s is None for s in self.input_shape[num_leading_axes:]): raise ValueError( "A DenseLayer requires a fixed input shape (except for " "the leading axes). Got %r for num_leading_axes=%d." % (self.input_shape, self.num_leading_axes)) num_inputs = int(np.prod(self.input_shape[num_leading_axes:])) self.W = self.add_param(W, (num_inputs, num_units), name="W") if b is None: self.b = None else: self.b = self.add_param(b, (num_units,), name="b", regularizable=False) def get_output_shape_for(self, input_shape): return input_shape[:self.num_leading_axes] + (self.num_units,) def get_output_for(self, input, **kwargs): num_leading_axes = self.num_leading_axes if num_leading_axes < 0: num_leading_axes += input.ndim if input.ndim > num_leading_axes + 1: # flatten trailing axes (into (n+1)-tensor for num_leading_axes=n) input = input.flatten(num_leading_axes + 1) activation = T.dot(input, self.W) if self.b is not None: activation = activation + self.b return self.nonlinearity(activation) class NINLayer(Layer): """ lasagne.layers.NINLayer(incoming, num_units, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs) Network-in-network layer. Like DenseLayer, but broadcasting across all trailing dimensions beyond the 2nd. This results in a convolution operation with filter size 1 on all trailing dimensions. Any number of trailing dimensions is supported, so NINLayer can be used to implement 1D, 2D, 3D, ... convolutions. Parameters ---------- incoming : a :class:`Layer` instance or a tuple The layer feeding into this layer, or the expected input shape num_units : int The number of units of the layer untie_biases : bool If false the network has a single bias vector similar to a dense layer. If true a separate bias vector is used for each trailing dimension beyond the 2nd. W : Theano shared variable, expression, numpy array or callable Initial value, expression or initializer for the weights. These should be a matrix with shape ``(num_inputs, num_units)``, where ``num_inputs`` is the size of the second dimension of the input. See :func:`lasagne.utils.create_param` for more information. b : Theano shared variable, expression, numpy array, callable or ``None`` Initial value, expression or initializer for the biases. If set to ``None``, the layer will have no biases. Otherwise, biases should be a 1D array with shape ``(num_units,)`` for ``untie_biases=False``, and a tensor of shape ``(num_units, input_shape[2], ..., input_shape[-1])`` for ``untie_biases=True``. See :func:`lasagne.utils.create_param` for more information. nonlinearity : callable or None The nonlinearity that is applied to the layer activations. If None is provided, the layer will be linear. Examples -------- >>> from lasagne.layers import InputLayer, NINLayer >>> l_in = InputLayer((100, 20, 10, 3)) >>> l1 = NINLayer(l_in, num_units=5) References ---------- .. [1] Lin, Min, Qiang Chen, and Shuicheng Yan (2013): Network in network. arXiv preprint arXiv:1312.4400. """ def __init__(self, incoming, num_units, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, **kwargs): super(NINLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (nonlinearities.identity if nonlinearity is None else nonlinearity) self.num_units = num_units self.untie_biases = untie_biases num_input_channels = self.input_shape[1] self.W = self.add_param(W, (num_input_channels, num_units), name="W") if b is None: self.b = None else: if self.untie_biases: biases_shape = (num_units,) + self.output_shape[2:] else: biases_shape = (num_units,) self.b = self.add_param(b, biases_shape, name="b", regularizable=False) def get_output_shape_for(self, input_shape): return (input_shape[0], self.num_units) + input_shape[2:] def get_output_for(self, input, **kwargs): # cf * bc01... = fb01... out_r = T.tensordot(self.W, input, axes=[[0], [1]]) # input dims to broadcast over remaining_dims = range(2, input.ndim) # bf01... out = out_r.dimshuffle(1, 0, *remaining_dims) if self.b is None: activation = out else: if self.untie_biases: # no broadcast remaining_dims_biases = range(1, input.ndim - 1) else: remaining_dims_biases = ['x'] * (input.ndim - 2) # broadcast b_shuffled = self.b.dimshuffle('x', 0, *remaining_dims_biases) activation = out + b_shuffled return self.nonlinearity(activation) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/lasagne/objectives.py0000644000175000017500000004142113307306052025202 0ustar sinclairssinclairs""" Provides some minimal help with building loss expressions for training or validating a neural network. Six functions build element- or item-wise loss expressions from network predictions and targets: .. autosummary:: :nosignatures: binary_crossentropy categorical_crossentropy squared_error binary_hinge_loss multiclass_hinge_loss huber_loss A convenience function aggregates such losses into a scalar expression suitable for differentiation: .. autosummary:: :nosignatures: aggregate Note that these functions only serve to write more readable code, but are completely optional. Essentially, any differentiable scalar Theano expression can be used as a training objective. Finally, two functions compute evaluation measures that are useful for validation and testing only, not for training: .. autosummary:: :nosignatures: binary_accuracy categorical_accuracy Those can also be aggregated into a scalar expression if needed. Examples -------- Assuming you have a simple neural network for 3-way classification: >>> from lasagne.layers import InputLayer, DenseLayer, get_output >>> from lasagne.nonlinearities import softmax, rectify >>> l_in = InputLayer((100, 20)) >>> l_hid = DenseLayer(l_in, num_units=30, nonlinearity=rectify) >>> l_out = DenseLayer(l_hid, num_units=3, nonlinearity=softmax) And Theano variables representing your network input and targets: >>> import theano >>> data = theano.tensor.matrix('data') >>> targets = theano.tensor.matrix('targets') You'd first construct an element-wise loss expression: >>> from lasagne.objectives import categorical_crossentropy, aggregate >>> predictions = get_output(l_out, data) >>> loss = categorical_crossentropy(predictions, targets) Then aggregate it into a scalar (you could also just call ``mean()`` on it): >>> loss = aggregate(loss, mode='mean') Finally, this gives a loss expression you can pass to any of the update methods in :mod:`lasagne.updates`. For validation of a network, you will usually want to repeat these steps with deterministic network output, i.e., without dropout or any other nondeterministic computation in between: >>> test_predictions = get_output(l_out, data, deterministic=True) >>> test_loss = categorical_crossentropy(test_predictions, targets) >>> test_loss = aggregate(test_loss) This gives a loss expression good for monitoring validation error. """ import theano.tensor from .utils import as_theano_expression __all__ = [ "binary_crossentropy", "categorical_crossentropy", "squared_error", "aggregate", "binary_hinge_loss", "multiclass_hinge_loss", "huber_loss", "binary_accuracy", "categorical_accuracy" ] def align_targets(predictions, targets): """Helper function turning a target 1D vector into a column if needed. This way, combining a network of a single output unit with a target vector works as expected by most users, not broadcasting outputs against targets. Parameters ---------- predictions : Theano tensor Expression for the predictions of a neural network. targets : Theano tensor Expression or variable for corresponding targets. Returns ------- predictions : Theano tensor The predictions unchanged. targets : Theano tensor If `predictions` is a column vector and `targets` is a 1D vector, returns `targets` turned into a column vector. Otherwise, returns `targets` unchanged. """ if (getattr(predictions, 'broadcastable', None) == (False, True) and getattr(targets, 'ndim', None) == 1): targets = as_theano_expression(targets).dimshuffle(0, 'x') return predictions, targets def binary_crossentropy(predictions, targets): """Computes the binary cross-entropy between predictions and targets. .. math:: L = -t \\log(p) - (1 - t) \\log(1 - p) Parameters ---------- predictions : Theano tensor Predictions in (0, 1), such as sigmoidal output of a neural network. targets : Theano tensor Targets in [0, 1], such as ground truth labels. Returns ------- Theano tensor An expression for the element-wise binary cross-entropy. Notes ----- This is the loss function of choice for binary classification problems and sigmoid output units. """ predictions, targets = align_targets(predictions, targets) return theano.tensor.nnet.binary_crossentropy(predictions, targets) def categorical_crossentropy(predictions, targets): """Computes the categorical cross-entropy between predictions and targets. .. math:: L_i = - \\sum_j{t_{i,j} \\log(p_{i,j})} :math:`p` are the predictions, :math:`t` are the targets, :math:`i` denotes the data point and :math:`j` denotes the class. Parameters ---------- predictions : Theano 2D tensor Predictions in (0, 1), such as softmax output of a neural network, with data points in rows and class probabilities in columns. targets : Theano 2D tensor or 1D tensor Either targets in [0, 1] matching the layout of `predictions`, or a vector of int giving the correct class index per data point. In the case of an integer vector argument, each element represents the position of the '1' in a one-hot encoding. Returns ------- Theano 1D tensor An expression for the item-wise categorical cross-entropy. Notes ----- This is the loss function of choice for multi-class classification problems and softmax output units. For hard targets, i.e., targets that assign all of the probability to a single class per data point, providing a vector of int for the targets is usually slightly more efficient than providing a matrix with a single 1.0 per row. """ return theano.tensor.nnet.categorical_crossentropy(predictions, targets) def squared_error(a, b): """Computes the element-wise squared difference between two tensors. .. math:: L = (p - t)^2 Parameters ---------- a, b : Theano tensor The tensors to compute the squared difference between. Returns ------- Theano tensor An expression for the element-wise squared difference. Notes ----- This is the loss function of choice for many regression problems or auto-encoders with linear output units. """ a, b = align_targets(a, b) return theano.tensor.square(a - b) def aggregate(loss, weights=None, mode='mean'): """Aggregates an element- or item-wise loss to a scalar loss. Parameters ---------- loss : Theano tensor The loss expression to aggregate. weights : Theano tensor, optional The weights for each element or item, must be broadcastable to the same shape as `loss` if given. If omitted, all elements will be weighted the same. mode : {'mean', 'sum', 'normalized_sum'} Whether to aggregate by averaging, by summing or by summing and dividing by the total weights (which requires `weights` to be given). Returns ------- Theano scalar A scalar loss expression suitable for differentiation. Notes ----- By supplying binary weights (i.e., only using values 0 and 1), this function can also be used for masking out particular entries in the loss expression. Note that masked entries still need to be valid values, not-a-numbers (NaNs) will propagate through. When applied to batch-wise loss expressions, setting `mode` to ``'normalized_sum'`` ensures that the loss per batch is of a similar magnitude, independent of associated weights. However, it means that a given data point contributes more to the loss when it shares a batch with low-weighted or masked data points than with high-weighted ones. """ if weights is not None: loss = loss * weights if mode == 'mean': return loss.mean() elif mode == 'sum': return loss.sum() elif mode == 'normalized_sum': if weights is None: raise ValueError("require weights for mode='normalized_sum'") return loss.sum() / weights.sum() else: raise ValueError("mode must be 'mean', 'sum' or 'normalized_sum', " "got %r" % mode) def binary_hinge_loss(predictions, targets, delta=1, log_odds=None, binary=True): """Computes the binary hinge loss between predictions and targets. .. math:: L_i = \\max(0, \\delta - t_i p_i) Parameters ---------- predictions : Theano tensor Predictions in (0, 1), such as sigmoidal output of a neural network (or log-odds of predictions depending on `log_odds`). targets : Theano tensor Targets in {0, 1} (or in {-1, 1} depending on `binary`), such as ground truth labels. delta : scalar, default 1 The hinge loss margin log_odds : bool, default None ``False`` if predictions are sigmoid outputs in (0, 1), ``True`` if predictions are sigmoid inputs, or log-odds. If ``None``, will assume ``True``, but warn that the default will change to ``False``. binary : bool, default True ``True`` if targets are in {0, 1}, ``False`` if they are in {-1, 1} Returns ------- Theano tensor An expression for the element-wise binary hinge loss Notes ----- This is an alternative to the binary cross-entropy loss for binary classification problems. Note that it is a drop-in replacement only when giving ``log_odds=False``. Otherwise, it requires log-odds rather than sigmoid outputs. Be aware that depending on the Theano version, ``log_odds=False`` with a sigmoid output layer may be less stable than ``log_odds=True`` with a linear layer. """ if log_odds is None: # pragma: no cover raise FutureWarning( "The `log_odds` argument to `binary_hinge_loss` will change " "its default to `False` in a future version. Explicitly give " "`log_odds=True` to retain current behavior in your code, " "but also check the documentation if this is what you want.") log_odds = True if not log_odds: predictions = theano.tensor.log(predictions / (1 - predictions)) if binary: targets = 2 * targets - 1 predictions, targets = align_targets(predictions, targets) return theano.tensor.nnet.relu(delta - predictions * targets) def multiclass_hinge_loss(predictions, targets, delta=1): """Computes the multi-class hinge loss between predictions and targets. .. math:: L_i = \\max_{j \\not = t_i} (0, p_j - p_{t_i} + \\delta) Parameters ---------- predictions : Theano 2D tensor Predictions in (0, 1), such as softmax output of a neural network, with data points in rows and class probabilities in columns. targets : Theano 2D tensor or 1D tensor Either a vector of int giving the correct class index per data point or a 2D tensor of one-hot encoding of the correct class in the same layout as predictions (non-binary targets in [0, 1] do not work!) delta : scalar, default 1 The hinge loss margin Returns ------- Theano 1D tensor An expression for the item-wise multi-class hinge loss Notes ----- This is an alternative to the categorical cross-entropy loss for multi-class classification problems """ num_cls = predictions.shape[1] if targets.ndim == predictions.ndim - 1: targets = theano.tensor.extra_ops.to_one_hot(targets, num_cls) elif targets.ndim != predictions.ndim: raise TypeError('rank mismatch between targets and predictions') corrects = predictions[targets.nonzero()] rest = theano.tensor.reshape(predictions[(1-targets).nonzero()], (-1, num_cls-1)) rest = theano.tensor.max(rest, axis=1) return theano.tensor.nnet.relu(rest - corrects + delta) def huber_loss(predictions, targets, delta=1): """ Computes the huber loss between predictions and targets. .. math:: L_i = \\frac{(p - t)^2}{2}, |p - t| \\le \\delta L_i = \\delta (|p - t| - \\frac{\\delta}{2} ), |p - t| \\gt \\delta Parameters ---------- predictions : Theano 2D tensor or 1D tensor Prediction outputs of a neural network. targets : Theano 2D tensor or 1D tensor Ground truth to which the prediction is to be compared with. Either a vector or 2D Tensor. delta : scalar, default 1 This delta value is defaulted to 1, for `SmoothL1Loss` described in Fast-RCNN paper [1]_ . Returns ------- Theano tensor An expression for the element-wise huber loss [2]_ . Notes ----- This is an alternative to the squared error for regression problems. References ---------- .. [1] Ross Girshick et al (2015): Fast RCNN https://arxiv.org/pdf/1504.08083.pdf .. [2] Huber, Peter et al (1964) Robust Estimation of a Location Parameter https://projecteuclid.org/euclid.aoms/1177703732 """ predictions, targets = align_targets(predictions, targets) abs_diff = abs(targets - predictions) ift = 0.5 * squared_error(targets, predictions) iff = delta * (abs_diff - delta / 2.) return theano.tensor.switch(abs_diff <= delta, ift, iff) def binary_accuracy(predictions, targets, threshold=0.5): """Computes the binary accuracy between predictions and targets. .. math:: L_i = \\mathbb{I}(t_i = \mathbb{I}(p_i \\ge \\alpha)) Parameters ---------- predictions : Theano tensor Predictions in [0, 1], such as a sigmoidal output of a neural network, giving the probability of the positive class targets : Theano tensor Targets in {0, 1}, such as ground truth labels. threshold : scalar, default: 0.5 Specifies at what threshold to consider the predictions being of the positive class Returns ------- Theano tensor An expression for the element-wise binary accuracy in {0, 1} Notes ----- This objective function should not be used with a gradient calculation; its gradient is zero everywhere. It is intended as a convenience for validation and testing, not training. To obtain the average accuracy, call :func:`theano.tensor.mean()` on the result, passing ``dtype=theano.config.floatX`` to compute the mean on GPU. """ predictions, targets = align_targets(predictions, targets) predictions = theano.tensor.ge(predictions, threshold) return theano.tensor.eq(predictions, targets) def categorical_accuracy(predictions, targets, top_k=1): """Computes the categorical accuracy between predictions and targets. .. math:: L_i = \\mathbb{I}(t_i = \\operatorname{argmax}_c p_{i,c}) Can be relaxed to allow matches among the top :math:`k` predictions: .. math:: L_i = \\mathbb{I}(t_i \\in \\operatorname{argsort}_c (-p_{i,c})_{:k}) Parameters ---------- predictions : Theano 2D tensor Predictions in (0, 1), such as softmax output of a neural network, with data points in rows and class probabilities in columns. targets : Theano 2D tensor or 1D tensor Either a vector of int giving the correct class index per data point or a 2D tensor of 1 hot encoding of the correct class in the same layout as predictions top_k : int Regard a prediction to be correct if the target class is among the `top_k` largest class probabilities. For the default value of 1, a prediction is correct only if the target class is the most probable. Returns ------- Theano 1D tensor An expression for the item-wise categorical accuracy in {0, 1} Notes ----- This is a strictly non differential function as it includes an argmax. This objective function should never be used with a gradient calculation. It is intended as a convenience for validation and testing not training. To obtain the average accuracy, call :func:`theano.tensor.mean()` on the result, passing ``dtype=theano.config.floatX`` to compute the mean on GPU. """ if targets.ndim == predictions.ndim: targets = theano.tensor.argmax(targets, axis=-1) elif targets.ndim != predictions.ndim - 1: raise TypeError('rank mismatch between targets and predictions') if top_k == 1: # standard categorical accuracy top = theano.tensor.argmax(predictions, axis=-1) return theano.tensor.eq(top, targets) else: # top-k accuracy top = theano.tensor.argsort(predictions, axis=-1) # (Theano cannot index with [..., -top_k:], we need to simulate that) top = top[[slice(None) for _ in range(top.ndim - 1)] + [slice(-top_k, None)]] targets = theano.tensor.shape_padaxis(targets, axis=-1) return theano.tensor.any(theano.tensor.eq(top, targets), axis=-1) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/setup.cfg0000644000175000017500000000027613307306052022705 0ustar sinclairssinclairs[aliases] dev = develop easy_install lasagne[testing] [pytest] addopts = -v --doctest-modules --cov=lasagne --cov-report=term-missing --pep8 lasagne/ python_files = test*py Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/.travis.yml0000644000175000017500000000076013307306052023173 0ustar sinclairssinclairslanguage: python sudo: false python: - "2.7" - "3.4" addons: apt: packages: - libblas-dev - liblapack-dev - gfortran before_install: - pip install -U pip install: - travis_wait travis_retry pip install -r requirements-dev.txt - travis_retry pip install python-coveralls - travis_retry python setup.py dev script: py.test --runslow --cov-config=.coveragerc-nogpu after_success: - coveralls cache: - apt - directories: - $HOME/.cache/pip - $HOME/.theano Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/.coveragerc0000644000175000017500000000003513307306052023176 0ustar sinclairssinclairs[run] omit = lasagne/tests/* Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/MANIFEST.in0000644000175000017500000000037013307306052022615 0ustar sinclairssinclairsinclude *.rst include *.txt include LICENSE recursive-include lasagne/tests *.py include .coveragerc recursive-include examples *.py recursive-include docs *.rst conf.py *.css Makefile recursive-exclude * __pycache__ recursive-exclude * *.py[co] Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/setup.py0000644000175000017500000000414413307306052022574 0ustar sinclairssinclairsimport os import re from setuptools import find_packages from setuptools import setup # We need io.open() (Python 3's default open) to specify file encodings import io here = os.path.abspath(os.path.dirname(__file__)) try: # obtain version string from __init__.py # Read ASCII file with builtin open() so __version__ is str in Python 2 and 3 with open(os.path.join(here, 'lasagne', '__init__.py'), 'r') as f: init_py = f.read() version = re.search('__version__ = "(.*)"', init_py).groups()[0] except Exception: version = '' try: # obtain long description from README and CHANGES # Specify encoding to get a unicode type in Python 2 and a str in Python 3 with io.open(os.path.join(here, 'README.rst'), 'r', encoding='utf-8') as f: README = f.read() with io.open(os.path.join(here, 'CHANGES.rst'), 'r', encoding='utf-8') as f: CHANGES = f.read() except IOError: README = CHANGES = '' install_requires = [ 'numpy', # 'Theano', # we require a development version, see requirements.txt ] tests_require = [ 'mock', 'pytest', 'pytest-cov', 'pytest-pep8', ] setup( name="Lasagne", version=version, description="A lightweight library to build and train neural networks " "in Theano", long_description="\n\n".join([README, CHANGES]), classifiers=[ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.4", "Topic :: Scientific/Engineering :: Artificial Intelligence", ], keywords="", author="Lasagne contributors", author_email="lasagne-users@googlegroups.com", url="https://github.com/Lasagne/Lasagne", license="MIT", packages=find_packages(), include_package_data=False, zip_safe=False, install_requires=install_requires, extras_require={ 'testing': tests_require, }, ) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/README.rst0000644000175000017500000001323313307306052022550 0ustar sinclairssinclairs.. image:: https://readthedocs.org/projects/lasagne/badge/ :target: http://lasagne.readthedocs.org/en/latest/ .. image:: https://travis-ci.org/Lasagne/Lasagne.svg :target: https://travis-ci.org/Lasagne/Lasagne .. image:: https://img.shields.io/coveralls/Lasagne/Lasagne.svg :target: https://coveralls.io/r/Lasagne/Lasagne .. image:: https://img.shields.io/badge/license-MIT-blue.svg :target: https://github.com/Lasagne/Lasagne/blob/master/LICENSE .. image:: https://zenodo.org/badge/16974/Lasagne/Lasagne.svg :target: https://zenodo.org/badge/latestdoi/16974/Lasagne/Lasagne Lasagne ======= Lasagne is a lightweight library to build and train neural networks in Theano. Its main features are: * Supports feed-forward networks such as Convolutional Neural Networks (CNNs), recurrent networks including Long Short-Term Memory (LSTM), and any combination thereof * Allows architectures of multiple inputs and multiple outputs, including auxiliary classifiers * Many optimization methods including Nesterov momentum, RMSprop and ADAM * Freely definable cost function and no need to derive gradients due to Theano's symbolic differentiation * Transparent support of CPUs and GPUs due to Theano's expression compiler Its design is governed by `six principles `_: * Simplicity: Be easy to use, easy to understand and easy to extend, to facilitate use in research * Transparency: Do not hide Theano behind abstractions, directly process and return Theano expressions or Python / numpy data types * Modularity: Allow all parts (layers, regularizers, optimizers, ...) to be used independently of Lasagne * Pragmatism: Make common use cases easy, do not overrate uncommon cases * Restraint: Do not obstruct users with features they decide not to use * Focus: "Do one thing and do it well" Installation ------------ In short, you can install a known compatible version of Theano and the latest Lasagne development version via: .. code-block:: bash pip install -r https://raw.githubusercontent.com/Lasagne/Lasagne/master/requirements.txt pip install https://github.com/Lasagne/Lasagne/archive/master.zip For more details and alternatives, please see the `Installation instructions `_. Documentation ------------- Documentation is available online: http://lasagne.readthedocs.org/ For support, please refer to the `lasagne-users mailing list `_. Example ------- .. code-block:: python import lasagne import theano import theano.tensor as T # create Theano variables for input and target minibatch input_var = T.tensor4('X') target_var = T.ivector('y') # create a small convolutional neural network from lasagne.nonlinearities import leaky_rectify, softmax network = lasagne.layers.InputLayer((None, 3, 32, 32), input_var) network = lasagne.layers.Conv2DLayer(network, 64, (3, 3), nonlinearity=leaky_rectify) network = lasagne.layers.Conv2DLayer(network, 32, (3, 3), nonlinearity=leaky_rectify) network = lasagne.layers.Pool2DLayer(network, (3, 3), stride=2, mode='max') network = lasagne.layers.DenseLayer(lasagne.layers.dropout(network, 0.5), 128, nonlinearity=leaky_rectify, W=lasagne.init.Orthogonal()) network = lasagne.layers.DenseLayer(lasagne.layers.dropout(network, 0.5), 10, nonlinearity=softmax) # create loss function prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() + 1e-4 * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2) # create parameter update expressions params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9) # compile training function that updates parameters and returns training loss train_fn = theano.function([input_var, target_var], loss, updates=updates) # train network (assuming you've got some training data in numpy arrays) for epoch in range(100): loss = 0 for input_batch, target_batch in training_data: loss += train_fn(input_batch, target_batch) print("Epoch %d: Loss %g" % (epoch + 1, loss / len(training_data))) # use trained network for predictions test_prediction = lasagne.layers.get_output(network, deterministic=True) predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1)) print("Predicted class for first test input: %r" % predict_fn(test_data[0])) For a fully-functional example, see `examples/mnist.py `_, and check the `Tutorial `_ for in-depth explanations of the same. More examples, code snippets and reproductions of recent research papers are maintained in the separate `Lasagne Recipes `_ repository. Citation -------- If you find Lasagne useful for your scientific work, please consider citing it in resulting publications. We provide a ready-to-use `BibTeX entry for citing Lasagne `_. Development ----------- Lasagne is a work in progress, input is welcome. Please see the `Contribution instructions `_ for details on how you can contribute! Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/examples/0000755000175000017500000000000013307306052022675 5ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/examples/recurrent.py0000755000175000017500000001527413307306052025274 0ustar sinclairssinclairs#!/usr/bin/env python # -*- coding: utf-8 -*- ''' Recurrent network example. Trains a bidirectional vanilla RNN to output the sum of two numbers in a sequence of random numbers sampled uniformly from [0, 1] based on a separate marker sequence. ''' from __future__ import print_function import numpy as np import theano import theano.tensor as T import lasagne # Min/max sequence length MIN_LENGTH = 50 MAX_LENGTH = 55 # Number of units in the hidden (recurrent) layer N_HIDDEN = 100 # Number of training sequences in each batch N_BATCH = 100 # Optimization learning rate LEARNING_RATE = .001 # All gradients above this will be clipped GRAD_CLIP = 100 # How often should we check the output? EPOCH_SIZE = 100 # Number of epochs to train the net NUM_EPOCHS = 10 def gen_data(min_length=MIN_LENGTH, max_length=MAX_LENGTH, n_batch=N_BATCH): ''' Generate a batch of sequences for the "add" task, e.g. the target for the following ``| 0.5 | 0.7 | 0.3 | 0.1 | 0.2 | ... | 0.5 | 0.9 | ... | 0.8 | 0.2 | | 0 | 0 | 1 | 0 | 0 | | 0 | 1 | | 0 | 0 |`` would be 0.3 + .9 = 1.2. This task was proposed in [1]_ and explored in e.g. [2]_. Parameters ---------- min_length : int Minimum sequence length. max_length : int Maximum sequence length. n_batch : int Number of samples in the batch. Returns ------- X : np.ndarray Input to the network, of shape (n_batch, max_length, 2), where the last dimension corresponds to the two sequences shown above. y : np.ndarray Correct output for each sample, shape (n_batch,). mask : np.ndarray A binary matrix of shape (n_batch, max_length) where ``mask[i, j] = 1`` when ``j <= (length of sequence i)`` and ``mask[i, j] = 0`` when ``j > (length of sequence i)``. References ---------- .. [1] Hochreiter, Sepp, and Jürgen Schmidhuber. "Long short-term memory." Neural computation 9.8 (1997): 1735-1780. .. [2] Sutskever, Ilya, et al. "On the importance of initialization and momentum in deep learning." Proceedings of the 30th international conference on machine learning (ICML-13). 2013. ''' # Generate X - we'll fill the last dimension later X = np.concatenate([np.random.uniform(size=(n_batch, max_length, 1)), np.zeros((n_batch, max_length, 1))], axis=-1) mask = np.zeros((n_batch, max_length)) y = np.zeros((n_batch,)) # Compute masks and correct values for n in range(n_batch): # Randomly choose the sequence length length = np.random.randint(min_length, max_length) # Make the mask for this sample 1 within the range of length mask[n, :length] = 1 # Zero out X after the end of the sequence X[n, length:, 0] = 0 # Set the second dimension to 1 at the indices to add X[n, np.random.randint(length/10), 1] = 1 X[n, np.random.randint(length/2, length), 1] = 1 # Multiply and sum the dimensions of X to get the target value y[n] = np.sum(X[n, :, 0]*X[n, :, 1]) # Center the inputs and outputs X -= X.reshape(-1, 2).mean(axis=0) y -= y.mean() return (X.astype(theano.config.floatX), y.astype(theano.config.floatX), mask.astype(theano.config.floatX)) def main(num_epochs=NUM_EPOCHS): print("Building network ...") # First, we build the network, starting with an input layer # Recurrent layers expect input of shape # (batch size, max sequence length, number of features) l_in = lasagne.layers.InputLayer(shape=(N_BATCH, MAX_LENGTH, 2)) # The network also needs a way to provide a mask for each sequence. We'll # use a separate input layer for that. Since the mask only determines # which indices are part of the sequence for each batch entry, they are # supplied as matrices of dimensionality (N_BATCH, MAX_LENGTH) l_mask = lasagne.layers.InputLayer(shape=(N_BATCH, MAX_LENGTH)) # We're using a bidirectional network, which means we will combine two # RecurrentLayers, one with the backwards=True keyword argument. # Setting a value for grad_clipping will clip the gradients in the layer # Setting only_return_final=True makes the layers only return their output # for the final time step, which is all we need for this task l_forward = lasagne.layers.RecurrentLayer( l_in, N_HIDDEN, mask_input=l_mask, grad_clipping=GRAD_CLIP, W_in_to_hid=lasagne.init.HeUniform(), W_hid_to_hid=lasagne.init.HeUniform(), nonlinearity=lasagne.nonlinearities.tanh, only_return_final=True) l_backward = lasagne.layers.RecurrentLayer( l_in, N_HIDDEN, mask_input=l_mask, grad_clipping=GRAD_CLIP, W_in_to_hid=lasagne.init.HeUniform(), W_hid_to_hid=lasagne.init.HeUniform(), nonlinearity=lasagne.nonlinearities.tanh, only_return_final=True, backwards=True) # Now, we'll concatenate the outputs to combine them. l_concat = lasagne.layers.ConcatLayer([l_forward, l_backward]) # Our output layer is a simple dense connection, with 1 output unit l_out = lasagne.layers.DenseLayer( l_concat, num_units=1, nonlinearity=lasagne.nonlinearities.tanh) target_values = T.vector('target_output') # lasagne.layers.get_output produces a variable for the output of the net network_output = lasagne.layers.get_output(l_out) # The network output will have shape (n_batch, 1); let's flatten to get a # 1-dimensional vector of predicted values predicted_values = network_output.flatten() # Our cost will be mean-squared error cost = T.mean((predicted_values - target_values)**2) # Retrieve all parameters from the network all_params = lasagne.layers.get_all_params(l_out) # Compute SGD updates for training print("Computing updates ...") updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE) # Theano functions for training and computing cost print("Compiling functions ...") train = theano.function([l_in.input_var, target_values, l_mask.input_var], cost, updates=updates) compute_cost = theano.function( [l_in.input_var, target_values, l_mask.input_var], cost) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val = gen_data() print("Training ...") try: for epoch in range(num_epochs): for _ in range(EPOCH_SIZE): X, y, m = gen_data() train(X, y, m) cost_val = compute_cost(X_val, y_val, mask_val) print("Epoch {} validation cost = {}".format(epoch, cost_val)) except KeyboardInterrupt: pass if __name__ == '__main__': main() Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/examples/mnist.py0000755000175000017500000003706713307306052024421 0ustar sinclairssinclairs#!/usr/bin/env python """ Usage example employing Lasagne for digit recognition using the MNIST dataset. This example is deliberately structured as a long flat file, focusing on how to use Lasagne, instead of focusing on writing maximally modular and reusable code. It is used as the foundation for the introductory Lasagne tutorial: http://lasagne.readthedocs.org/en/latest/user/tutorial.html More in-depth examples and reproductions of paper results are maintained in a separate repository: https://github.com/Lasagne/Recipes """ from __future__ import print_function import sys import os import time import numpy as np import theano import theano.tensor as T import lasagne # ################## Download and prepare the MNIST dataset ################## # This is just some way of getting the MNIST dataset from an online location # and loading it into numpy arrays. It doesn't involve Lasagne at all. def load_dataset(): # We first define a download function, supporting both Python 2 and 3. if sys.version_info[0] == 2: from urllib import urlretrieve else: from urllib.request import urlretrieve def download(filename, source='http://yann.lecun.com/exdb/mnist/'): print("Downloading %s" % filename) urlretrieve(source + filename, filename) # We then define functions for loading MNIST images and labels. # For convenience, they also download the requested files if needed. import gzip def load_mnist_images(filename): if not os.path.exists(filename): download(filename) # Read the inputs in Yann LeCun's binary format. with gzip.open(filename, 'rb') as f: data = np.frombuffer(f.read(), np.uint8, offset=16) # The inputs are vectors now, we reshape them to monochrome 2D images, # following the shape convention: (examples, channels, rows, columns) data = data.reshape(-1, 1, 28, 28) # The inputs come as bytes, we convert them to float32 in range [0,1]. # (Actually to range [0, 255/256], for compatibility to the version # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.) return data / np.float32(256) def load_mnist_labels(filename): if not os.path.exists(filename): download(filename) # Read the labels in Yann LeCun's binary format. with gzip.open(filename, 'rb') as f: data = np.frombuffer(f.read(), np.uint8, offset=8) # The labels are vectors of integers now, that's exactly what we want. return data # We can now download and read the training and test set images and labels. X_train = load_mnist_images('train-images-idx3-ubyte.gz') y_train = load_mnist_labels('train-labels-idx1-ubyte.gz') X_test = load_mnist_images('t10k-images-idx3-ubyte.gz') y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz') # We reserve the last 10000 training examples for validation. X_train, X_val = X_train[:-10000], X_train[-10000:] y_train, y_val = y_train[:-10000], y_train[-10000:] # We just return all the arrays in order, as expected in main(). # (It doesn't matter how we do this as long as we can read them again.) return X_train, y_train, X_val, y_val, X_test, y_test # ##################### Build the neural network model ####################### # This script supports three types of models. For each one, we define a # function that takes a Theano variable representing the input and returns # the output layer of a neural network model built in Lasagne. def build_mlp(input_var=None): # This creates an MLP of two hidden layers of 800 units each, followed by # a softmax output layer of 10 units. It applies 20% dropout to the input # data and 50% dropout to the hidden layers. # Input layer, specifying the expected input shape of the network # (unspecified batchsize, 1 channel, 28 rows and 28 columns) and # linking it to the given Theano variable `input_var`, if any: l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input_var) # Apply 20% dropout to the input data: l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2) # Add a fully-connected layer of 800 units, using the linear rectifier, and # initializing weights with Glorot's scheme (which is the default anyway): l_hid1 = lasagne.layers.DenseLayer( l_in_drop, num_units=800, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform()) # We'll now add dropout of 50%: l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5) # Another 800-unit layer: l_hid2 = lasagne.layers.DenseLayer( l_hid1_drop, num_units=800, nonlinearity=lasagne.nonlinearities.rectify) # 50% dropout again: l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5) # Finally, we'll add the fully-connected output layer, of 10 softmax units: l_out = lasagne.layers.DenseLayer( l_hid2_drop, num_units=10, nonlinearity=lasagne.nonlinearities.softmax) # Each layer is linked to its incoming layer(s), so we only need to pass # the output layer to give access to a network in Lasagne: return l_out def build_custom_mlp(input_var=None, depth=2, width=800, drop_input=.2, drop_hidden=.5): # By default, this creates the same network as `build_mlp`, but it can be # customized with respect to the number and size of hidden layers. This # mostly showcases how creating a network in Python code can be a lot more # flexible than a configuration file. Note that to make the code easier, # all the layers are just called `network` -- there is no need to give them # different names if all we return is the last one we created anyway; we # just used different names above for clarity. # Input layer and dropout (with shortcut `dropout` for `DropoutLayer`): network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input_var) if drop_input: network = lasagne.layers.dropout(network, p=drop_input) # Hidden layers and dropout: nonlin = lasagne.nonlinearities.rectify for _ in range(depth): network = lasagne.layers.DenseLayer( network, width, nonlinearity=nonlin) if drop_hidden: network = lasagne.layers.dropout(network, p=drop_hidden) # Output layer: softmax = lasagne.nonlinearities.softmax network = lasagne.layers.DenseLayer(network, 10, nonlinearity=softmax) return network def build_cnn(input_var=None): # As a third model, we'll create a CNN of two convolution + pooling stages # and a fully-connected hidden layer in front of the output layer. # Input layer, as usual: network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input_var) # This time we do not apply input dropout, as it tends to work less well # for convolutional layers. # Convolutional layer with 32 kernels of size 5x5. Strided and padded # convolutions are supported as well; see the docstring. network = lasagne.layers.Conv2DLayer( network, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform()) # Expert note: Lasagne provides alternative convolutional layers that # override Theano's choice of which implementation to use; for details # please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html. # Max-pooling layer of factor 2 in both dimensions: network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) # Another convolution with 32 5x5 kernels, and another 2x2 pooling: network = lasagne.layers.Conv2DLayer( network, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify) network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) # A fully-connected layer of 256 units with 50% dropout on its inputs: network = lasagne.layers.DenseLayer( lasagne.layers.dropout(network, p=.5), num_units=256, nonlinearity=lasagne.nonlinearities.rectify) # And, finally, the 10-unit output layer with 50% dropout on its inputs: network = lasagne.layers.DenseLayer( lasagne.layers.dropout(network, p=.5), num_units=10, nonlinearity=lasagne.nonlinearities.softmax) return network # ############################# Batch iterator ############################### # This is just a simple helper function iterating over training data in # mini-batches of a particular size, optionally in random order. It assumes # data is available as numpy arrays. For big datasets, you could load numpy # arrays as memory-mapped files (np.load(..., mmap_mode='r')), or write your # own custom data iteration function. For small datasets, you can also copy # them to GPU at once for slightly improved performance. This would involve # several changes in the main program, though, and is not demonstrated here. # Notice that this function returns only mini-batches of size `batchsize`. # If the size of the data is not a multiple of `batchsize`, it will not # return the last (remaining) mini-batch. def iterate_minibatches(inputs, targets, batchsize, shuffle=False): assert len(inputs) == len(targets) if shuffle: indices = np.arange(len(inputs)) np.random.shuffle(indices) for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): if shuffle: excerpt = indices[start_idx:start_idx + batchsize] else: excerpt = slice(start_idx, start_idx + batchsize) yield inputs[excerpt], targets[excerpt] # ############################## Main program ################################ # Everything else will be handled in our main program now. We could pull out # more functions to better separate the code, but it wouldn't make it any # easier to read. def main(model='mlp', num_epochs=500): # Load the dataset print("Loading data...") X_train, y_train, X_val, y_val, X_test, y_test = load_dataset() # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") if model == 'mlp': network = build_mlp(input_var) elif model.startswith('custom_mlp:'): depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',') network = build_custom_mlp(input_var, int(depth), int(width), float(drop_in), float(drop_hid)) elif model == 'cnn': network = build_cnn(input_var) else: print("Unrecognized model type %r." % model) return # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum( loss, params, learning_rate=0.01, momentum=0.9) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100)) # Optionally, you could now dump the network weights to a file like this: # np.savez('model.npz', *lasagne.layers.get_all_param_values(network)) # # And load them again later on like this: # with np.load('model.npz') as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # lasagne.layers.set_all_param_values(network, param_values) if __name__ == '__main__': if ('--help' in sys.argv) or ('-h' in sys.argv): print("Trains a neural network on MNIST using Lasagne.") print("Usage: %s [MODEL [EPOCHS]]" % sys.argv[0]) print() print("MODEL: 'mlp' for a simple Multi-Layer Perceptron (MLP),") print(" 'custom_mlp:DEPTH,WIDTH,DROP_IN,DROP_HID' for an MLP") print(" with DEPTH hidden layers of WIDTH units, DROP_IN") print(" input dropout and DROP_HID hidden dropout,") print(" 'cnn' for a simple Convolutional Neural Network (CNN).") print("EPOCHS: number of training epochs to perform (default: 500)") else: kwargs = {} if len(sys.argv) > 1: kwargs['model'] = sys.argv[1] if len(sys.argv) > 2: kwargs['num_epochs'] = int(sys.argv[2]) main(**kwargs) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/.github/0000755000175000017500000000000013307306052022417 5ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/.github/CONTRIBUTING.md0000644000175000017500000000331013307306052024645 0ustar sinclairssinclairs- **If you have a question or need help using Lasagne**, please post on [our mailing list](https://groups.google.com/forum/#!forum/lasagne-users) instead of creating an issue. Make sure to check the [Lasagne documentation](http://lasagne.readthedocs.org/en/latest/) and the [Theano documentation](http://deeplearning.net/software/theano/) first! You can search the mailing list as well to see if your question has come up before. - **If you would like to report a bug**, feel free to open an issue. Please verify first that the problem is not in your own code by reviewing the documentation. If you are able to provide a minimal code example that reproduces the bug, this will greatly speed up the process of tracking down the problem. - **If you would like to contribute**, feel free to open a pull request. Please review our documentation on [what to contribute](http://lasagne.readthedocs.org/en/latest/user/development.html#what-to-contribute) and [how to contribute](http://lasagne.readthedocs.org/en/latest/user/development.html#how-to-contribute). Some contributions may be better suited for our [Recipes repository](https://github.com/Lasagne/Recipes), where we collect examples, tutorials, trained models, utilities and paper reimplementations. Links ----- - Mailing list: https://groups.google.com/forum/#!forum/lasagne-users - Lasagne documentation: http://lasagne.readthedocs.org/en/latest/ - Theano documentation: http://deeplearning.net/software/theano/ - What to contribute: http://lasagne.readthedocs.org/en/latest/user/development.html#what-to-contribute - How to contribute: http://lasagne.readthedocs.org/en/latest/user/development.html#how-to-contribute - Recipes repository: https://github.com/Lasagne/Recipes Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/.github/ISSUE_TEMPLATE.md0000644000175000017500000000266213307306052025132 0ustar sinclairssinclairsBefore submitting your issue, please check these hints! - **If you have a usage question**, please please post on [our mailing list](https://groups.google.com/forum/#!forum/lasagne-users) instead of creating an issue. Make sure to check the [Lasagne documentation](http://lasagne.readthedocs.org/en/latest/) and the [Theano documentation](http://deeplearning.net/software/theano/) first! You can search the mailing list as well to see if your question has come up before. - **If you suspect you have found a bug**, please first try [updating to the bleeding-edge versions of Theano and Lasagne](http://lasagne.readthedocs.io/en/latest/user/installation.html#bleeding-edge-version). It may have been fixed already. If you are not sure whether the problem lies within your code, Theano, or Lasagne, first post on [our mailing list](https://groups.google.com/forum/#!forum/lasagne-users). In any case, try to provide a minimal code example that reproduces the bug, this will greatly speed up the process of tracking down the problem. - **If you have a feature request or idea**, please include a clear description of the use case(s) it would enable, referencing research papers if applicable, and indicate whether you would be willing to implement the feature yourself. We are happy to discuss your suggestion, help refining it, and decide upfront whether it would fit the main library or our [Lasagne/Recipes](https://github.com/Lasagne/Recipes). Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/.github/PULL_REQUEST_TEMPLATE.md0000644000175000017500000000255413307306052026226 0ustar sinclairssinclairsBefore submitting your pull request, please check these hints! - If you are not familiar with the github workflow, have a look: https://guides.github.com/introduction/flow/ In particular, note that in order to update your pull request to include any changes we asked for, you just need to push to your branch again. - If your pull request addresses a particular issue from our issue tracker, reference it in your pull request description on github (not the commit message) using the syntax `Closes #123` or `Fixes #123`. Pull request check list: - Install Lasagne in editable mode to be able to run tests locally: http://lasagne.readthedocs.io/en/latest/user/development.html#development-setup - Make sure PEP8 is followed: `python -m pep8 lasagne/` - Make sure the test suite runs through: `python -m py.test` (or, to only run tests that include the substring `foo` in their name: `python -m py.test -k foo`) - At the end of the test run output, check if coverage is at 100%. If not (or not for the files you changed), you will need to add tests covering the code you added. - It is fine to submit a PR without tests to get initial feedback on the implementation, but we cannot merge it without tests. - If you added/changed any documentation, verify that it renders correctly: http://lasagne.readthedocs.io/en/latest/user/development.html#documentation Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/CHANGES.rst0000644000175000017500000000234713307306052022667 0ustar sinclairssinclairsChangelog --------- 0.1 (2015-08-13) ~~~~~~~~~~~~~~~~ First release. * core contributors, in alphabetical order: * Eric Battenberg (@ebattenberg) * Sander Dieleman (@benanne) * Daniel Nouri (@dnouri) * Eben Olson (@ebenolson) * Aäron van den Oord (@avdnoord) * Colin Raffel (@craffel) * Jan Schlüter (@f0k) * Søren Kaae Sønderby (@skaae) * extra contributors, in chronological order: * Daniel Maturana (@dimatura): documentation, cuDNN layers, LRN * Jonas Degrave (@317070): get_all_param_values() fix * Jack Kelly (@JackKelly): help with recurrent layers * Gábor Takács (@takacsg84): support broadcastable parameters in lasagne.updates * Diogo Moitinho de Almeida (@diogo149): MNIST example fixes * Brian McFee (@bmcfee): MaxPool2DLayer fix * Martin Thoma (@MartinThoma): documentation * Jeffrey De Fauw (@JeffreyDF): documentation, ADAM fix * Michael Heilman (@mheilman): NonlinearityLayer, lasagne.random * Gregory Sanders (@instagibbs): documentation fix * Jon Crall (@erotemic): check for non-positive input shapes * Hendrik Weideman (@hjweide): set_all_param_values() test, MaxPool2DCCLayer fix * Kashif Rasul (@kashif): ADAM simplification * Peter de Rivaz (@peterderivaz): documentation fix Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/0000755000175000017500000000000013307306052022007 5ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/Makefile0000644000175000017500000001515613307306052023457 0ustar sinclairssinclairs# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/lasagne.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/lasagne.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/lasagne" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/lasagne" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/user/0000755000175000017500000000000013307306052022765 5ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/user/installation.rst0000644000175000017500000002204613307306052026224 0ustar sinclairssinclairs.. _installation: ============ Installation ============ Lasagne has a couple of prerequisites that need to be installed first, but it is not very picky about versions. The single exception is Theano: Due to its tight coupling to Theano, you will have to install a recent version of Theano (usually more recent than the latest official release!) fitting the version of Lasagne you choose to install. Most of the instructions below assume you are running a Linux or Mac system, but are otherwise very generic. For detailed step-by-step instructions for specific platforms including Windows, check our `From Zero to Lasagne `_ guides. If you run into any trouble, please check the `Theano installation instructions `_ which cover installing the prerequisites for a range of operating systems, or ask for help on `our mailing list `_. Prerequisites ============= Python + pip ------------ Lasagne currently requires Python 2.7 or 3.4 to run. Please install Python via the package manager of your operating system if it is not included already. Python includes ``pip`` for installing additional modules that are not shipped with your operating system, or shipped in an old version, and we will make use of it below. We recommend installing these modules into your home directory via ``--user``, or into a `virtual environment `_ via ``virtualenv``. C compiler ---------- Theano requires a working C compiler, and numpy/scipy require a compiler as well if you install them via ``pip``. On Linux, the default compiler is usually ``gcc``, and on Mac OS, it's ``clang``. Again, please install them via the package manager of your operating system. numpy/scipy + BLAS ------------------ Lasagne requires numpy of version 1.6.2 or above, and Theano also requires scipy 0.11 or above. Numpy/scipy rely on a BLAS library to provide fast linear algebra routines. They will work fine without one, but a lot slower, so it is worth getting this right (but this is less important if you plan to use a GPU). If you install numpy and scipy via your operating system's package manager, they should link to the BLAS library installed in your system. If you install numpy and scipy via ``pip install numpy`` and ``pip install scipy``, make sure to have development headers for your BLAS library installed (e.g., the ``libopenblas-dev`` package on Debian/Ubuntu) while running the installation command. Please refer to the `numpy/scipy build instructions `_ if in doubt. Theano ------ The version to install depends on the Lasagne version you choose, so this will be handled below. Stable Lasagne release ====================== Lasagne 0.1 requires a more recent version of Theano than the one available on PyPI. To install a version that is known to work, run the following command: .. code-block:: bash pip install -r https://raw.githubusercontent.com/Lasagne/Lasagne/v0.1/requirements.txt .. warning:: An even more recent version of Theano will often work as well, but at the time of writing, a simple ``pip install Theano`` will give you a version that is too old. To install release 0.1 of Lasagne from PyPI, run the following command: .. code-block:: bash pip install Lasagne==0.1 If you do not use ``virtualenv``, add ``--user`` to both commands to install into your home directory instead. To upgrade from an earlier installation, add ``--upgrade``. Bleeding-edge version ===================== The latest development version of Lasagne usually works fine with the latest development version of Theano. To install both, run the following commands: .. code-block:: bash pip install --upgrade https://github.com/Theano/Theano/archive/master.zip pip install --upgrade https://github.com/Lasagne/Lasagne/archive/master.zip Again, add ``--user`` if you want to install to your home directory instead. .. _lasagne-development-install: Development installation ======================== Alternatively, you can install Lasagne (and optionally Theano) from source, in a way that any changes to your local copy of the source tree take effect without requiring a reinstall. This is often referred to as *editable* or *development* mode. Firstly, you will need to obtain a copy of the source tree: .. code-block:: bash git clone https://github.com/Lasagne/Lasagne.git It will be cloned to a subdirectory called ``Lasagne``. Make sure to place it in some permanent location, as for an *editable* installation, Python will import the module directly from this directory and not copy over the files. Enter the directory and install the known good version of Theano: .. code-block:: bash cd Lasagne pip install -r requirements.txt Alternatively, install the bleeding-edge version of Theano as described in the previous section. To install the Lasagne package itself, in editable mode, run: .. code-block:: bash pip install --editable . As always, add ``--user`` to install it to your home directory instead. **Optional**: If you plan to contribute to Lasagne, you will need to fork the Lasagne repository on GitHub. This will create a repository under your user account. Update your local clone to refer to the official repository as ``upstream``, and your personal fork as ``origin``: .. code-block:: bash git remote rename origin upstream git remote add origin https://github.com//Lasagne.git If you set up an `SSH key `_, use the SSH clone URL instead: ``git@github.com:/Lasagne.git``. You can now use this installation to develop features and send us pull requests on GitHub, see :doc:`development`! GPU support =========== Thanks to Theano, Lasagne transparently supports training your networks on a GPU, which may be 10 to 50 times faster than training them on a CPU. Currently, this requires an NVIDIA GPU with CUDA support, and some additional software for Theano to use it. CUDA ---- Install the latest CUDA Toolkit and possibly the corresponding driver available from NVIDIA: https://developer.nvidia.com/cuda-downloads Closely follow the *Getting Started Guide* linked underneath the download table to be sure you don't mess up your system by installing conflicting drivers. After installation, make sure ``/usr/local/cuda/bin`` is in your ``PATH``, so ``nvcc --version`` works. Also make sure ``/usr/local/cuda/lib64`` is in your ``LD_LIBRARY_PATH``, so the toolkit libraries can be found. Theano ------ If CUDA is set up correctly, the following should print some information on your GPU (the first CUDA-capable GPU in your system if you have multiple ones): .. code-block:: bash THEANO_FLAGS=device=gpu python -c "import theano; print(theano.sandbox.cuda.device_properties(0))" To configure Theano to use the GPU by default, create a file ``.theanorc`` directly in your home directory, with the following contents: .. code-block:: none [global] floatX = float32 device = gpu Optionally add ``allow_gc = False`` for some extra performance at the expense of (sometimes substantially) higher GPU memory usage. If you run into problems, please check Theano's instructions for `Using the GPU `_. cuDNN ----- NVIDIA provides a library for common neural network operations that especially speeds up Convolutional Neural Networks (CNNs). Again, it can be obtained from NVIDIA (after registering as a developer): https://developer.nvidia.com/cudnn Note that it requires a reasonably modern GPU with Compute Capability 3.0 or higher; see `NVIDIA's list of CUDA GPUs `_. To install it, copy the ``*.h`` files to ``/usr/local/cuda/include`` and the ``lib*`` files to ``/usr/local/cuda/lib64``. To check whether it is found by Theano, run the following command: .. code-block:: bash python -c "from theano.sandbox.cuda.dnn import dnn_available as d; print(d() or d.msg)" It will print ``True`` if everything is fine, or an error message otherwise. There are no additional steps required for Theano to make use of cuDNN. Docker ====== Instead of manually installing Theano and Lasagne on your machines as described above, you may want to use a pre-made `Docker `_ image: `Lasagne Docker (CPU) `_ or `Lasagne Docker (CUDA) `_. These are updated on a weekly basis with bleeding-edge builds of Theano and Lasagne. Examples of running bash in a Docker container are as follows: .. code-block:: bash sudo docker run -it kaixhin/lasagne sudo nvidia-docker run -it kaixhin/cuda-lasagne:7.0 For a guide to Docker, see the `official docs `_. CUDA support requires `NVIDIA Docker `_. For more details on how to use the Lasagne Docker images, consult the `source project `_. Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/user/development.rst0000644000175000017500000002166613307306052026054 0ustar sinclairssinclairsDevelopment =========== The Lasagne project was started by Sander Dieleman in September 2014. It is developed by a core team of eight people (in alphabetical order: `Eric Battenberg `_, `Sander Dieleman `_, `Daniel Nouri `_, `Eben Olson `_, `Aäron van den Oord `_, `Colin Raffel `_, `Jan Schlüter `_, `Søren Kaae Sønderby `_) and `numerous additional contributors `_ on GitHub: https://github.com/Lasagne/Lasagne As an open-source project by researchers for researchers, we highly welcome contributions! Every bit helps and will be credited. .. _lasagne-philosopy: Philosophy ---------- Lasagne grew out of a need to combine the flexibility of Theano with the availability of the right building blocks for training neural networks. Its development is guided by a number of design goals: * **Simplicity**: Be easy to use, easy to understand and easy to extend, to facilitate use in research. Interfaces should be kept small, with as few classes and methods as possible. Every added abstraction and feature should be carefully scrutinized, to determine whether the added complexity is justified. * **Transparency**: Do not hide Theano behind abstractions, directly process and return Theano expressions or Python / numpy data types. Try to rely on Theano's functionality where possible, and follow Theano's conventions. * **Modularity**: Allow all parts (layers, regularizers, optimizers, ...) to be used independently of Lasagne. Make it easy to use components in isolation or in conjunction with other frameworks. * **Pragmatism**: Make common use cases easy, do not overrate uncommon cases. Ideally, everything should be possible, but common use cases shouldn't be made more difficult just to cater for exotic ones. * **Restraint**: Do not obstruct users with features they decide not to use. Both in using and in extending components, it should be possible for users to be fully oblivious to features they do not need. * **Focus**: "Do one thing and do it well". Do not try to provide a library for everything to do with deep learning. What to contribute ------------------ Give feedback ~~~~~~~~~~~~~ To send us general feedback, questions or ideas for improvement, please post on `our mailing list`_. If you have a very concrete feature proposal, add it to the `issue tracker on GitHub`_: * Explain how it would work, and link to a scientific paper if applicable. * Keep the scope as narrow as possible, to make it easier to implement. Report bugs ~~~~~~~~~~~ Report bugs at the `issue tracker on GitHub`_. If you are reporting a bug, please include: * your Lasagne and Theano version. * steps to reproduce the bug, ideally reduced to a few Python commands. * the results you obtain, and the results you expected instead. If you are unsure whether the behavior you experience is a bug, or if you are unsure whether it is related to Lasagne or Theano, please just ask on `our mailing list`_ first. Fix bugs ~~~~~~~~ Look through the GitHub issues for bug reports. Anything tagged with "bug" is open to whoever wants to implement it. If you discover a bug in Lasagne you can fix yourself, by all means feel free to just implement a fix and not report it first. Implement features ~~~~~~~~~~~~~~~~~~ Look through the GitHub issues for feature proposals. Anything tagged with "feature" or "enhancement" is open to whoever wants to implement it. If you have a feature in mind you want to implement yourself, please note that Lasagne has a fairly narrow focus and we strictly follow a set of :ref:`design principles `, so we cannot guarantee upfront that your code will be included. Please do not hesitate to just propose your idea in a GitHub issue or on the mailing list first, so we can discuss it and/or guide you through the implementation. Write documentation ~~~~~~~~~~~~~~~~~~~ Whenever you find something not explained well, misleading, glossed over or just wrong, please update it! The *Edit on GitHub* link on the top right of every documentation page and the *[source]* link for every documented entity in the API reference will help you to quickly locate the origin of any text. How to contribute ----------------- Edit on GitHub ~~~~~~~~~~~~~~ As a very easy way of just fixing issues in the documentation, use the *Edit on GitHub* link on the top right of a documentation page or the *[source]* link of an entity in the API reference to open the corresponding source file in GitHub, then click the *Edit this file* link to edit the file in your browser and send us a Pull Request. All you need for this is a free GitHub account. For any more substantial changes, please follow the steps below to setup Lasagne for development. Development setup ~~~~~~~~~~~~~~~~~ First, follow the instructions for performing a development installation of Lasagne (including forking on GitHub): :ref:`lasagne-development-install` To be able to run the tests and build the documentation locally, install additional requirements with: ``pip install -r requirements-dev.txt`` (adding ``--user`` if you want to install to your home directory instead). If you use the bleeding-edge version of Theano, then instead of running that command, just use ``pip install`` to manually install all dependencies listed in ``requirements-dev.txt`` with their correct versions; otherwise it will attempt to downgrade Theano to the known good version in ``requirements.txt``. Documentation ~~~~~~~~~~~~~ The documentation is generated with `Sphinx `_. To build it locally, run the following commands: .. code:: bash cd docs make html Afterwards, open ``docs/_build/html/index.html`` to view the documentation as it would appear on `readthedocs `_. If you changed a lot and seem to get misleading error messages or warnings, run ``make clean html`` to force Sphinx to recreate all files from scratch. When writing docstrings, follow existing documentation as much as possible to ensure consistency throughout the library. For additional information on the syntax and conventions used, please refer to the following documents: * `reStructuredText Primer `_ * `Sphinx reST markup constructs `_ * `A Guide to NumPy/SciPy Documentation `_ Testing ~~~~~~~ Lasagne has a code coverage of 100%, which has proven very helpful in the past, but also creates some duties: * Whenever you change any code, you should test whether it breaks existing features by just running the test suite. The test suite will also be run by `Travis `_ for any Pull Request to Lasagne. * Any code you add needs to be accompanied by tests ensuring that nobody else breaks it in future. `Coveralls `_ will check whether the code coverage stays at 100% for any Pull Request to Lasagne. * Every bug you fix indicates a missing test case, so a proposed bug fix should come with a new test that fails without your fix. To run the full test suite, just do .. code:: bash py.test Testing will take over 5 minutes for the first run, but less than a minute for subsequent runs when Theano can reuse compiled code. It will end with a code coverage report specifying which code lines are not covered by tests, if any. Furthermore, it will list any failed tests, and failed `PEP8 `_ checks. To only run tests matching a certain name pattern, use the ``-k`` command line switch, e.g., ``-k pool`` will run the pooling layer tests only. To land in a ``pdb`` debug prompt on a failure to inspect it more closely, use the ``--pdb`` switch. Finally, for a loop-on-failing mode, do ``pip install pytest-xdist`` and run ``py.test -f``. This will pause after the run, wait for any source file to change and run all previously failing tests again. Sending Pull Requests ~~~~~~~~~~~~~~~~~~~~~ When you're satisfied with your addition, the tests pass and the documentation looks good without any markup errors, commit your changes to a new branch, push that branch to your fork and send us a Pull Request via GitHub's web interface. All these steps are nicely explained on GitHub: https://guides.github.com/introduction/flow/ When filing your Pull Request, please include a description of what it does, to help us reviewing it. If it is fixing an open issue, say, issue #123, add *Fixes #123*, *Resolves #123* or *Closes #123* to the description text, so GitHub will close it when your request is merged. .. _issue tracker on GitHub: https://github.com/Lasagne/Lasagne/issues .. _our mailing list: https://groups.google.com/forum/#!forum/lasagne-users Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/user/tutorial.rst0000644000175000017500000006013213307306052025364 0ustar sinclairssinclairs.. _tutorial: ======== Tutorial ======== This tutorial will walk you through building a handwritten digits classifier using the MNIST dataset, arguably the "Hello World" of neural networks. More tutorials and examples can be found in the `Lasagne Recipes`_ repository. Before we start =============== The tutorial assumes that you are somewhat familiar with neural networks and Theano (the library which Lasagne is built on top of). You can try to learn both at once from the `Deeplearning Tutorial`_. For a more slow-paced introduction to artificial neural networks, we recommend `Convolutional Neural Networks for Visual Recognition`_ by Andrej Karpathy et al., `Neural Networks and Deep Learning`_ by Michael Nielsen or a standard text book such as "Machine Learning" by Tom Mitchell. To learn more about Theano, have a look at the `Theano tutorial`_. You will not need all of it, but a basic understanding of how Theano works is required to be able to use Lasagne. If you're new to Theano, going through that tutorial up to (and including) "More Examples" should get you covered! `Graph Structures`_ is a good extra read if you're curious about its inner workings. Run the MNIST example ===================== In this first part of the tutorial, we will just run the MNIST example that's included in the source distribution of Lasagne. We assume that you have already run through the :ref:`installation`. If you haven't done so already, get a copy of the source tree of Lasagne, and navigate to the folder in a terminal window. Enter the ``examples`` folder and run the ``mnist.py`` example script: .. code-block:: bash cd examples python mnist.py If everything is set up correctly, you will get an output like the following: .. code-block:: text Using gpu device 0: GeForce GT 640 Loading data... Downloading train-images-idx3-ubyte.gz Downloading train-labels-idx1-ubyte.gz Downloading t10k-images-idx3-ubyte.gz Downloading t10k-labels-idx1-ubyte.gz Building model and compiling functions... Starting training... Epoch 1 of 500 took 1.858s training loss: 1.233348 validation loss: 0.405868 validation accuracy: 88.78 % Epoch 2 of 500 took 1.845s training loss: 0.571644 validation loss: 0.310221 validation accuracy: 91.24 % Epoch 3 of 500 took 1.845s training loss: 0.471582 validation loss: 0.265931 validation accuracy: 92.35 % Epoch 4 of 500 took 1.847s training loss: 0.412204 validation loss: 0.238558 validation accuracy: 93.05 % ... The example script allows you to try three different models, selected via the first command line argument. Run the script with ``python mnist.py --help`` for more information and feel free to play around with it some more before we have a look at the implementation. Understand the MNIST example ============================ Let's now investigate what's needed to make that happen! To follow along, open up the source code in your favorite editor (or online: `mnist.py`_). Preface ------- The first thing you might notice is that besides Lasagne, we also import numpy and Theano: .. code-block:: python import numpy as np import theano import theano.tensor as T import lasagne While Lasagne is built on top of Theano, it is meant as a supplement helping with some tasks, not as a replacement. You will always mix Lasagne with some vanilla Theano code. Loading data ------------ The first piece of code defines a function ``load_dataset()``. Its purpose is to download the MNIST dataset (if it hasn't been downloaded yet) and return it in the form of regular numpy arrays. There is no Lasagne involved at all, so for the purpose of this tutorial, we can regard it as: .. code-block:: python def load_dataset(): ... return X_train, y_train, X_val, y_val, X_test, y_test ``X_train.shape`` is ``(50000, 1, 28, 28)``, to be interpreted as: 50,000 images of 1 channel, 28 rows and 28 columns each. Note that the number of channels is 1 because we have monochrome input. Color images would have 3 channels, spectrograms also would have a single channel. ``y_train.shape`` is simply ``(50000,)``, that is, it is a vector the same length of ``X_train`` giving an integer class label for each image -- namely, the digit between 0 and 9 depicted in the image (according to the human annotator who drew that digit). Building the model ------------------ This is where Lasagne steps in. It allows you to define an arbitrarily structured neural network by creating and stacking or merging layers. Since every layer knows its immediate incoming layers, the output layer (or output layers) of a network double as a handle to the network as a whole, so usually this is the only thing we will pass on to the rest of the code. As mentioned above, ``mnist.py`` supports three types of models, and we implement that via three easily exchangeable functions of the same interface. First, we'll define a function that creates a Multi-Layer Perceptron (MLP) of a fixed architecture, explaining all the steps in detail. We'll then present a function generating an MLP of a custom architecture. Finally, we'll show how to create a Convolutional Neural Network (CNN). Multi-Layer Perceptron (MLP) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The first function, ``build_mlp()``, creates an MLP of two hidden layers of 800 units each, followed by a softmax output layer of 10 units. It applies 20% dropout to the input data and 50% dropout to the hidden layers. It is similar, but not fully equivalent to the smallest MLP in [Hinton2012]_ (that paper uses different nonlinearities, weight initialization and training). The foundation of each neural network in Lasagne is an :class:`InputLayer ` instance (or multiple of those) representing the input data that will subsequently be fed to the network. Note that the ``InputLayer`` is not tied to any specific data yet, but only holds the shape of the data that will be passed to the network. In addition, it creates or can be linked to a `Theano variable `_ that will represent the network input in the `Theano graph `_ we'll build from the network later. Thus, our function starts like this: .. code-block:: python def build_mlp(input_var=None): l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input_var) The four numbers in the shape tuple represent, in order: ``(batchsize, channels, rows, columns)``. Here we've set the batchsize to ``None``, which means the network will accept input data of arbitrary batchsize after compilation. If you know the batchsize beforehand and do not need this flexibility, you should give the batchsize here -- especially for convolutional layers, this can allow Theano to apply some optimizations. ``input_var`` denotes the Theano variable we want to link the network's input layer to. If it is omitted (or set to ``None``), the layer will just create a suitable variable itself, but it can be handy to link an existing variable to the network at construction time -- especially if you're creating networks of multiple input layers. Here, we link it to a variable given as an argument to the ``build_mlp()`` function. Before adding the first hidden layer, we'll apply 20% dropout to the input data. This is realized via a :class:`DropoutLayer ` instance: .. code-block:: python l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2) Note that the first constructor argument is the incoming layer, such that ``l_in_drop`` is now stacked on top of ``l_in``. All layers work this way, except for layers that merge multiple inputs: those accept a list of incoming layers as their first constructor argument instead. We'll proceed with the first fully-connected hidden layer of 800 units. Note that when stacking a :class:`DenseLayer ` on higher-order input tensors, they will be flattened implicitly so we don't need to care about that. In this case, the input will be flattened from 1x28x28 images to 784-dimensional vectors. .. code-block:: python l_hid1 = lasagne.layers.DenseLayer( l_in_drop, num_units=800, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform()) Again, the first constructor argument means that we're stacking ``l_hid1`` on top of ``l_in_drop``. ``num_units`` simply gives the number of units for this fully-connected layer. ``nonlinearity`` takes a nonlinearity function, several of which are defined in :mod:`lasagne.nonlinearities`. Here we've chosen the linear rectifier, so we'll obtain ReLUs. Finally, :class:`lasagne.init.GlorotUniform()` gives the initializer for the weight matrix ``W``. This particular initializer samples weights from a uniform distribution of a carefully chosen range. Other initializers are available in :mod:`lasagne.init`, and alternatively, ``W`` could also have been initialized from a Theano shared variable or numpy array of the correct shape (784x800 in this case, as the input to this layer has 1*28*28=784 dimensions). Note that ``lasagne.init.GlorotUniform()`` is the default, so we'll omit it from here -- we just wanted to highlight that there is a choice. We'll now add dropout of 50%, another 800-unit dense layer and 50% dropout again: .. code-block:: python l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5) l_hid2 = lasagne.layers.DenseLayer( l_hid1_drop, num_units=800, nonlinearity=lasagne.nonlinearities.rectify) l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5) Finally, we'll add the fully-connected output layer. The main difference is that it uses the softmax nonlinearity, as we're planning to solve a 10-class classification problem with this network. .. code-block:: python l_out = lasagne.layers.DenseLayer( l_hid2_drop, num_units=10, nonlinearity=lasagne.nonlinearities.softmax) As mentioned above, each layer is linked to its incoming layer(s), so we only need the output layer(s) to access a network in Lasagne: .. code-block:: python return l_out Custom MLP ^^^^^^^^^^ The second function has a slightly more extensive signature: .. code-block:: python def build_custom_mlp(input_var=None, depth=2, width=800, drop_input=.2, drop_hidden=.5): By default, it creates the same network as ``build_mlp()`` described above, but it can be customized with respect to the number and size of hidden layers, as well as the amount of input and hidden dropout. This demonstrates how creating a network in Python code can be a lot more flexible than a configuration file. See for yourself: .. code-block:: python # Input layer and dropout (with shortcut `dropout` for `DropoutLayer`): network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input_var) if drop_input: network = lasagne.layers.dropout(network, p=drop_input) # Hidden layers and dropout: nonlin = lasagne.nonlinearities.rectify for _ in range(depth): network = lasagne.layers.DenseLayer( network, width, nonlinearity=nonlin) if drop_hidden: network = lasagne.layers.dropout(network, p=drop_hidden) # Output layer: softmax = lasagne.nonlinearities.softmax network = lasagne.layers.DenseLayer(network, 10, nonlinearity=softmax) return network With two ``if`` clauses and a ``for`` loop, this network definition allows varying the architecture in a way that would be impossible for a ``.yaml`` file in `Pylearn2`_ or a ``.cfg`` file in `cuda-convnet`_. Note that to make the code easier, all the layers are just called ``network`` here -- there is no need to give them different names if all we return is the last one we created anyway; we just used different names before for clarity. Convolutional Neural Network (CNN) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Finally, the ``build_cnn()`` function creates a CNN of two convolution and pooling stages, a fully-connected hidden layer and a fully-connected output layer. The function begins like the others: .. code-block:: python def build_cnn(input_var=None): network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input_var) We don't apply dropout to the inputs, as this tends to work less well for convolutional layers. Instead of a :class:`DenseLayer `, we now add a :class:`Conv2DLayer ` with 32 filters of size 5x5 on top: .. code-block:: python network = lasagne.layers.Conv2DLayer( network, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform()) The nonlinearity and weight initializer can be given just as for the ``DenseLayer`` (and again, ``GlorotUniform()`` is the default, we'll omit it from now). Strided and padded convolutions are supported as well; see the :class:`Conv2DLayer ` docstring. .. note:: For experts: ``Conv2DLayer`` will create a convolutional layer using ``T.nnet.conv2d``, Theano's default convolution. On compilation for GPU, Theano replaces this with a `cuDNN`_-based implementation if available, otherwise falls back to a gemm-based implementation. For details on this, please see the `Theano convolution documentation`_. Lasagne also provides convolutional layers directly enforcing a specific implementation: :class:`lasagne.layers.dnn.Conv2DDNNLayer` to enforce cuDNN, :class:`lasagne.layers.corrmm.Conv2DMMLayer` to enforce the gemm-based one, :class:`lasagne.layers.cuda_convnet.Conv2DCCLayer` for Krizhevsky's `cuda-convnet`_. We then apply max-pooling of factor 2 in both dimensions, using a :class:`MaxPool2DLayer ` instance: .. code-block:: python network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) We add another convolution and pooling stage like the ones before: .. code-block:: python network = lasagne.layers.Conv2DLayer( network, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify) network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) Then a fully-connected layer of 256 units with 50% dropout on its inputs (using the :class:`lasagne.layers.dropout` shortcut directly inline): .. code-block:: python network = lasagne.layers.DenseLayer( lasagne.layers.dropout(network, p=.5), num_units=256, nonlinearity=lasagne.nonlinearities.rectify) And finally a 10-unit softmax output layer, again with 50% dropout: .. code-block:: python network = lasagne.layers.DenseLayer( lasagne.layers.dropout(network, p=.5), num_units=10, nonlinearity=lasagne.nonlinearities.softmax) return network Training the model ------------------ The remaining part of the ``mnist.py`` script copes with setting up and running a training loop over the MNIST dataset. Dataset iteration ^^^^^^^^^^^^^^^^^ It first defines a short helper function for synchronously iterating over two numpy arrays of input data and targets, respectively, in mini-batches of a given number of items. For the purpose of this tutorial, we can shorten it to: .. code-block:: python def iterate_minibatches(inputs, targets, batchsize, shuffle=False): if shuffle: ... for ...: yield inputs[...], targets[...] All that's relevant is that it is a generator function that serves one batch of inputs and targets at a time until the given dataset (in ``inputs`` and ``targets``) is exhausted, either in sequence or in random order. Below we will plug this function into our training loop, validation loop and test loop. Preparation ^^^^^^^^^^^ Let's now focus on the ``main()`` function. A bit simplified, it begins like this: .. code-block:: python # Load the dataset X_train, y_train, X_val, y_val, X_test, y_test = load_dataset() # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Create neural network model network = build_mlp(input_var) The first line loads the inputs and targets of the MNIST dataset as numpy arrays, split into training, validation and test data. The next two statements define symbolic Theano variables that will represent a mini-batch of inputs and targets in all the Theano expressions we will generate for network training and inference. They are not tied to any data yet, but their dimensionality and data type is fixed already and matches the actual inputs and targets we will process later. Finally, we call one of the three functions for building the Lasagne network, depending on the first command line argument -- we've just removed command line handling here for clarity. Note that we hand the symbolic input variable to ``build_mlp()`` so it will be linked to the network's input layer. Loss and update expressions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ Continuing, we create a loss expression to be minimized in training: .. code-block:: python prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() The first step generates a Theano expression for the network output given the input variable linked to the network's input layer(s). The second step defines a Theano expression for the categorical cross-entropy loss between said network output and the targets. Finally, as we need a scalar loss, we simply take the mean over the mini-batch. Depending on the problem you are solving, you will need different loss functions, see :mod:`lasagne.objectives` for more. Having the model and the loss function defined, we create update expressions for training the network. An update expression describes how to change the trainable parameters of the network at each presented mini-batch. We will use Stochastic Gradient Descent (SGD) with Nesterov momentum here, but the :mod:`lasagne.updates` module offers several others you can plug in instead: .. code-block:: python params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum( loss, params, learning_rate=0.01, momentum=0.9) The first step collects all Theano ``SharedVariable`` instances making up the trainable parameters of the layer, and the second step generates an update expression for each parameter. For monitoring progress during training, after each epoch, we evaluate the network on the validation set. We need a slightly different loss expression for that: .. code-block:: python test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() The crucial difference is that we pass ``deterministic=True`` to the :func:`get_output ` call. This causes all nondeterministic layers to switch to a deterministic implementation, so in our case, it disables the dropout layers. As an additional monitoring quantity, we create an expression for the classification accuracy: .. code-block:: python test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) It also builds on the deterministic ``test_prediction`` expression. Compilation ^^^^^^^^^^^ Equipped with all the necessary Theano expressions, we're now ready to compile a function performing a training step: .. code-block:: python train_fn = theano.function([input_var, target_var], loss, updates=updates) This tells Theano to generate and compile a function taking two inputs -- a mini-batch of images and a vector of corresponding targets -- and returning a single output: the training loss. Additionally, each time it is invoked, it applies all parameter updates in the ``updates`` dictionary, thus performing a gradient descent step with Nesterov momentum. For validation, we compile a second function: .. code-block:: python val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) This one also takes a mini-batch of images and targets, then returns the (deterministic) loss and classification accuracy, not performing any updates. Training loop ^^^^^^^^^^^^^ We're finally ready to write the training loop. In essence, we just need to do the following: .. code-block:: python for epoch in range(num_epochs): for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True): inputs, targets = batch train_fn(inputs, targets) This uses our dataset iteration helper function to iterate over the training data in random order, in mini-batches of 500 items each, for ``num_epochs`` epochs, and calls the training function we compiled to perform an update step of the network parameters. But to be able to monitor the training progress, we capture the training loss, compute the validation loss and print some information to the console every time an epoch finishes: .. code-block:: python for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) At the very end, we re-use the ``val_fn()`` function to compute the loss and accuracy on the test set, finishing the script. Where to go from here ===================== This finishes our introductory tutorial. For more information on what you can do with Lasagne's layers, just continue reading through :doc:`layers` and :doc:`custom_layers`. More tutorials, examples and code snippets can be found in the `Lasagne Recipes`_ repository. Finally, the reference lists and explains all layers (:mod:`lasagne.layers`), weight initializers (:mod:`lasagne.init`), nonlinearities (:mod:`lasagne.nonlinearities`), loss expressions (:mod:`lasagne.objectives`), training methods (:mod:`lasagne.updates`) and regularizers (:mod:`lasagne.regularization`) included in the library, and should also make it simple to create your own. .. _Lasagne Recipes: https://github.com/Lasagne/Recipes .. _Deeplearning Tutorial: http://deeplearning.net/tutorial/ .. _Convolutional Neural Networks for Visual Recognition: http://cs231n.github.io/ .. _Neural Networks and Deep Learning: http://neuralnetworksanddeeplearning.com/ .. _Theano tutorial: http://deeplearning.net/software/theano/tutorial/ .. _Graph Structures: http://deeplearning.net/software/theano/extending/graphstructures.html .. _mnist.py: https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py .. [Hinton2012] Improving neural networks by preventing co-adaptation of feature detectors. http://arxiv.org/abs/1207.0580 .. _Pylearn2: http://deeplearning.net/software/pylearn2/ .. _cuda-convnet: https://code.google.com/p/cuda-convnet/ .. _cuDNN: https://developer.nvidia.com/cudnn .. _Theano convolution documentation: http://deeplearning.net/software/theano/library/tensor/nnet/conv.html Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/user/layers.rst0000644000175000017500000002006713307306052025023 0ustar sinclairssinclairsLayers ====== The `lasagne.layers` module provides various classes representing the layers of a neural network. All of them are subclasses of the :class:`lasagne.layers.Layer` base class. Creating a layer ---------------- A layer can be created as an instance of a `Layer` subclass. For example, a dense layer can be created as follows: >>> import lasagne >>> l = lasagne.layers.DenseLayer(l_in, num_units=100) # doctest: +SKIP This will create a dense layer with 100 units, connected to another layer `l_in`. Creating a network ------------------ Note that for almost all types of layers, you will have to specify one or more other layers that the layer you are creating gets its input from. The main exception is :class:`InputLayer`, which can be used to represent the input of a network. Chaining layer instances together like this will allow you to specify your desired network structure. Note that the same layer can be used as input to multiple other layers, allowing for arbitrary tree and directed acyclic graph (DAG) structures. Here is an example of an MLP with a single hidden layer: >>> import theano.tensor as T >>> l_in = lasagne.layers.InputLayer((100, 50)) >>> l_hidden = lasagne.layers.DenseLayer(l_in, num_units=200) >>> l_out = lasagne.layers.DenseLayer(l_hidden, num_units=10, ... nonlinearity=T.nnet.softmax) The first layer of the network is an `InputLayer`, which represents the input. When creating an input layer, you should specify the shape of the input data. In this example, the input is a matrix with shape (100, 50), representing a batch of 100 data points, where each data point is a vector of length 50. The first dimension of a tensor is usually the batch dimension, following the established Theano and scikit-learn conventions. The hidden layer of the network is a dense layer with 200 units, taking its input from the input layer. Note that we did not specify the nonlinearity of the hidden layer. A layer with rectified linear units will be created by default. The output layer of the network is a dense layer with 10 units and a softmax nonlinearity, allowing for 10-way classification of the input vectors. Note also that we did not create any object representing the entire network. Instead, the output layer instance `l_out` is also used to refer to the entire network in Lasagne. Naming layers ------------- For convenience, you can name a layer by specifying the `name` keyword argument: >>> l_hidden = lasagne.layers.DenseLayer(l_in, num_units=200, ... name="hidden_layer") Initializing parameters ----------------------- Many types of layers, such as :class:`DenseLayer`, have trainable parameters. These are referred to by short names that match the conventions used in modern deep learning literature. For example, a weight matrix will usually be called `W`, and a bias vector will usually be `b`. When creating a layer with trainable parameters, Theano shared variables will be created for them and initialized automatically. You can optionally specify your own initialization strategy by using keyword arguments that match the parameter variable names. For example: >>> l = lasagne.layers.DenseLayer(l_in, num_units=100, ... W=lasagne.init.Normal(0.01)) The weight matrix `W` of this dense layer will be initialized using samples from a normal distribution with standard deviation 0.01 (see `lasagne.init` for more information). There are several ways to manually initialize parameters: - Theano shared variable If a shared variable instance is provided, this is used unchanged as the parameter variable. For example: >>> import theano >>> import numpy as np >>> W = theano.shared(np.random.normal(0, 0.01, (50, 100))) >>> l = lasagne.layers.DenseLayer(l_in, num_units=100, W=W) - numpy array If a numpy array is provided, a shared variable is created and initialized using the array. For example: >>> W_init = np.random.normal(0, 0.01, (50, 100)) >>> l = lasagne.layers.DenseLayer(l_in, num_units=100, W=W_init) - callable If a callable is provided (e.g. a function or a :class:`lasagne.init.Initializer` instance), a shared variable is created and the callable is called with the desired shape to generate suitable initial parameter values. The variable is then initialized with those values. For example: >>> l = lasagne.layers.DenseLayer(l_in, num_units=100, ... W=lasagne.init.Normal(0.01)) Or, using a custom initialization function: >>> def init_W(shape): ... return np.random.normal(0, 0.01, shape) >>> l = lasagne.layers.DenseLayer(l_in, num_units=100, W=init_W) Some types of parameter variables can also be set to ``None`` at initialization (e.g. biases). In that case, the parameter variable will be omitted. For example, creating a dense layer without biases is done as follows: >>> l = lasagne.layers.DenseLayer(l_in, num_units=100, b=None) Parameter sharing ----------------- Parameter sharing between multiple layers can be achieved by using the same Theano shared variable instance for their parameters. For example: >>> l1 = lasagne.layers.DenseLayer(l_in, num_units=100) >>> l2 = lasagne.layers.DenseLayer(l_in, num_units=100, W=l1.W) These two layers will now share weights (but have separate biases). Propagating data through layers ------------------------------- To compute an expression for the output of a single layer given its input, the `get_output_for()` method can be used. To compute the output of a network, you should instead call :func:`lasagne.layers.get_output()` on it. This will traverse the network graph. You can call this function with the layer you want to compute the output expression for: >>> y = lasagne.layers.get_output(l_out) In that case, a Theano expression will be returned that represents the output in function of the input variables associated with the :class:`lasagne.layers.InputLayer` instance (or instances) in the network, so given the example network from before, you could compile a Theano function to compute its output given an input as follows: >>> f = theano.function([l_in.input_var], lasagne.layers.get_output(l_out)) You can also specify a Theano expression to use as input as a second argument to :func:`lasagne.layers.get_output()`: >>> x = T.matrix('x') >>> y = lasagne.layers.get_output(l_out, x) >>> f = theano.function([x], y) This only works when there is only a single :class:`InputLayer` in the network. If there is more than one, you can specify input expressions in a dictionary. For example, in a network with two input layers `l_in1` and `l_in2` and an output layer `l_out`: >>> x1 = T.matrix('x1') >>> x2 = T.matrix('x2') >>> y = lasagne.layers.get_output(l_out, { l_in1: x1, l_in2: x2 }) Any keyword arguments passed to `get_output()` are propagated to all layers. This makes it possible to control the behavior of the entire network. The main use case for this is the ``deterministic`` keyword argument, which disables stochastic behaviour such as dropout when set to ``True``. This is useful because a deterministic output is desirable at evaluation time. >>> y = lasagne.layers.get_output(l_out, deterministic=True) Some networks may have multiple output layers - or you may just want to compute output expressions for intermediate layers in the network. In that case, you can pass a list of layers. For example, in a network with two output layers `l_out1` and `l_out2`: >>> y1, y2 = lasagne.layers.get_output([l_out1, l_out2]) You could also just call :func:`lasagne.layers.get_output()` twice: >>> y1 = lasagne.layers.get_output(l_out1) >>> y2 = lasagne.layers.get_output(l_out2) However, this is **not recommended**! Some network layers may have non-deterministic output, such as dropout layers. If you compute the network output expressions with separate calls to :func:`lasagne.layers.get_output()`, they will not use the same samples. Furthermore, this may lead to unnecessary computation because Theano is not always able to merge identical computations properly. Calling `get_output()` only once prevents both of these issues.Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/user/custom_layers.rst0000644000175000017500000001456213307306052026420 0ustar sinclairssinclairsCreating custom layers ====================== A simple layer -------------- To implement a custom layer in Lasagne, you will have to write a Python class that subclasses :class:`Layer` and implement at least one method: `get_output_for()`. This method computes the output of the layer given its input. Note that both the output and the input are Theano expressions, so they are symbolic. The following is an example implementation of a layer that multiplies its input by 2: .. code:: python class DoubleLayer(lasagne.layers.Layer): def get_output_for(self, input, **kwargs): return 2 * input This is all that's required to implement a functioning custom layer class in Lasagne. A layer that changes the shape ------------------------------ If the layer does not change the shape of the data (for example because it applies an elementwise operation), then implementing only this one method is sufficient. Lasagne will assume that the output of the layer has the same shape as its input. However, if the operation performed by the layer changes the shape of the data, you also need to implement `get_output_shape_for()`. This method computes the shape of the layer output given the shape of its input. Note that this shape computation should result in a tuple of integers, so it is *not* symbolic. This method exists because Lasagne needs a way to propagate shape information when a network is defined, so it can determine what sizes the parameter tensors should be, for example. This mechanism allows each layer to obtain the size of its input from the previous layer, which means you don't have to specify the input size manually. This also prevents errors stemming from inconsistencies between the layers' expected and actual shapes. We can implement a layer that computes the sum across the trailing axis of its input as follows: .. code:: python class SumLayer(lasagne.layers.Layer): def get_output_for(self, input, **kwargs): return input.sum(axis=-1) def get_output_shape_for(self, input_shape): return input_shape[:-1] It is important that the shape computation is correct, as this shape information may be used to initialize other layers in the network. A layer with parameters ----------------------- If the layer has parameters, these should be initialized in the constructor. In Lasagne, parameters are represented by Theano shared variables. A method is provided to create and register parameter variables: :meth:`lasagne.layers.Layer.add_param()`. To show how this can be used, here is a layer that multiplies its input by a matrix ``W`` (much like a typical fully connected layer in a neural network would). This matrix is a parameter of the layer. The shape of the matrix will be ``(num_inputs, num_units)``, where ``num_inputs`` is the number of input features and ``num_units`` has to be specified when the layer is created. .. code:: python class DotLayer(lasagne.layers.Layer): def __init__(self, incoming, num_units, W=lasagne.init.Normal(0.01), **kwargs): super(DotLayer, self).__init__(incoming, **kwargs) num_inputs = self.input_shape[1] self.num_units = num_units self.W = self.add_param(W, (num_inputs, num_units), name='W') def get_output_for(self, input, **kwargs): return T.dot(input, self.W) def get_output_shape_for(self, input_shape): return (input_shape[0], self.num_units) A few things are worth noting here: when overriding the constructor, we need to call the superclass constructor on the first line. This is important to ensure the layer functions properly. Note that we pass ``**kwargs`` - although this is not strictly necessary, it enables some other cool Lasagne features, such as making it possible to give the layer a name: >>> l_dot = DotLayer(l_in, num_units=50, name='my_dot_layer') The call to ``self.add_param()`` creates the Theano shared variable representing the parameter, and registers it so it can later be retrieved using :meth:`lasagne.layers.Layer.get_params()`. It returns the created variable, which we tuck away in ``self.W`` for easy access. Note that we've also made it possible to specify a custom initialization strategy for ``W`` by adding a constructor argument for it, e.g.: >>> l_dot = DotLayer(l_in, num_units=50, W=lasagne.init.Constant(0.0)) This 'Lasagne idiom' of tucking away a created parameter variable in an attribute for easy access and adding a constructor argument with the same name to specify the initialization strategy is very common throughout the library. Finally, note that we used ``self.input_shape`` to determine the shape of the parameter matrix. This property is available in all Lasagne layers, once the superclass constructor has been called. A layer with multiple behaviors ------------------------------- Some layers can have multiple behaviors. For example, a layer implementing dropout should be able to be switched on or off. During training, we want it to apply dropout noise to its input and scale up the remaining values, but during evaluation we don't want it to do anything. For this purpose, the `get_output_for()` method takes optional keyword arguments (``kwargs``). When `get_output()` is called to compute an expression for the output of a network, all specified keyword arguments are passed to the `get_output_for()` methods of all layers in the network. For layers that add noise for regularization purposes, such as dropout, the convention in Lasagne is to use the keyword argument ``deterministic`` to control its behavior. Lasagne's :class:`lasagne.layers.DropoutLayer` looks roughly like this (simplified implementation for illustration purposes): .. code:: python from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams _srng = RandomStreams() class DropoutLayer(Layer): def __init__(self, incoming, p=0.5, **kwargs): super(DropoutLayer, self).__init__(incoming, **kwargs) self.p = p def get_output_for(self, input, deterministic=False, **kwargs): if deterministic: # do nothing in the deterministic case return input else: # add dropout noise otherwise retain_prob = 1 - self.p input /= retain_prob return input * _srng.binomial(input.shape, p=retain_prob, dtype=theano.config.floatX) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/_static/0000755000175000017500000000000013307306052023435 5ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/_static/fix_rtd.css0000644000175000017500000000021313307306052025602 0ustar sinclairssinclairs/* work around https://github.com/snide/sphinx_rtd_theme/issues/149 */ .rst-content table.field-list .field-body { padding-top: 8px; } Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/index.rst0000644000175000017500000000200213307306052023642 0ustar sinclairssinclairsWelcome to Lasagne ================== Lasagne is a lightweight library to build and train neural networks in Theano. Lasagne is a work in progress, input is welcome. The available documentation is limited for now. The project is on `GitHub`_. User Guide ------------ The Lasagne user guide explains how to install Lasagne, how to build and train neural networks using Lasagne, and how to contribute to the library as a developer. .. toctree:: :maxdepth: 2 user/installation user/tutorial user/layers user/custom_layers user/development API Reference ------------- If you are looking for information on a specific function, class or method, this part of the documentation is for you. .. toctree:: :maxdepth: 2 modules/layers modules/updates modules/init modules/nonlinearities modules/objectives modules/regularization modules/random modules/utils Indices and tables ------------------ * :ref:`genindex` * :ref:`modindex` * :ref:`search` .. _GitHub: https://github.com/Lasagne/LasagneLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/conf.py0000644000175000017500000002745613307306052023324 0ustar sinclairssinclairs# -*- coding: utf-8 -*- # # Lasagne documentation build configuration file, created by # sphinx-quickstart on Sat Nov 8 11:00:12 2014. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # import sys import os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # sys.path.insert(0, os.path.abspath('.')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.doctest', 'sphinx.ext.mathjax', # 'sphinx.ext.viewcode', # create HTML file of source code and link to it 'sphinx.ext.linkcode', # link to github, see linkcode_resolve() below 'numpydoc', # 'sphinx.ext.napoleon', # alternative to numpydoc -- looks a bit worse. ] # See https://github.com/rtfd/readthedocs.org/issues/283 mathjax_path = ('https://cdn.mathjax.org/mathjax/latest/MathJax.js?' 'config=TeX-AMS-MML_HTMLorMML') # see http://stackoverflow.com/q/12206334/562769 numpydoc_show_class_members = False # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = u'Lasagne' copyright = u'2014–2015, Lasagne contributors' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # import lasagne # The short X.Y version. version = '.'.join(lasagne.__version__.split('.', 2)[:2]) # The full version, including alpha/beta/rc tags. release = lasagne.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ['_build'] # The reST default role (used for this markup: `text`) to use for all # documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. # keep_warnings = False # Resolve function for the linkcode extension. def linkcode_resolve(domain, info): def find_source(): # try to find the file and line number, based on code from numpy: # https://github.com/numpy/numpy/blob/master/doc/source/conf.py#L286 obj = sys.modules[info['module']] for part in info['fullname'].split('.'): obj = getattr(obj, part) import inspect import os fn = inspect.getsourcefile(obj) fn = os.path.relpath(fn, start=os.path.dirname(lasagne.__file__)) source, lineno = inspect.getsourcelines(obj) return fn, lineno, lineno + len(source) - 1 if domain != 'py' or not info['module']: return None try: filename = 'lasagne/%s#L%d-L%d' % find_source() except Exception: filename = info['module'].replace('.', '/') + '.py' tag = 'master' if 'dev' in release else ('v' + release) return "https://github.com/Lasagne/Lasagne/blob/%s/%s" % (tag, filename) # -- Options for HTML output ---------------------------------------------- ## Classic Python style: #html_theme = 'classic' #html_theme_options = { # 'stickysidebar': True, #} ## Read the docs style: if os.environ.get('READTHEDOCS') != 'True': try: import sphinx_rtd_theme except ImportError: pass # assume we have sphinx >= 1.3 else: html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] html_theme = 'sphinx_rtd_theme' def setup(app): app.add_stylesheet("fix_rtd.css") ## Bootstrap style: #import sphinx_bootstrap_theme #html_theme = 'bootstrap' #html_theme_options = { # 'bootswatch_theme': 'cosmo', # see http://bootswatch.com/ for more # 'bootstrap_version': '3', # 'navbar_title': 'Lasagne', # 'source_link_position': 'footer', #} #html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. # html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'lasagnedoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ ('index', 'lasagne.tex', u'lasagne Documentation', u'Lasagne contributors', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'lasagne', u'Lasagne Documentation', [u'Lasagne contributors'], 1) ] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ('index', 'lasagne', u'Lasagne Documentation', u'Lasagne contributors', 'Lasagne', 'One line description of project.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] # If false, no module index is generated. # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. # texinfo_no_detailmenu = False # fool rtd into thinking a GPU is available, so all modules are importable try: from unittest.mock import Mock except ImportError: from mock import Mock import theano import sys theano.config = Mock(device='gpu') theano.sandbox = Mock() sys.modules['theano.sandbox'] = theano.sandbox sys.modules['theano.sandbox.cuda'] = theano.sandbox.cuda sys.modules['theano.sandbox.cuda.dnn'] = theano.sandbox.cuda.dnn sys.modules['theano.sandbox.cuda.basic_ops'] = theano.sandbox.cuda.basic_ops theano.sandbox.cuda.cuda_enabled = True theano.sandbox.cuda.dnn.dnn_available = lambda: True theano.gpuarray = Mock() sys.modules['theano.gpuarray'] = theano.gpuarray sys.modules['theano.gpuarray.dnn'] = theano.gpuarray.dnn theano.gpuarray.pygpu_activated = True theano.gpuarray.dnn.dnn_present = lambda: True sys.modules['pylearn2'] = Mock() sys.modules['pylearn2.sandbox'] = Mock() sys.modules['pylearn2.sandbox.cuda_convnet'] = Mock() sys.modules['pylearn2.sandbox.cuda_convnet.filter_acts'] = \ Mock(FilterActs=None) # fool rtd into thinking it has a recent enough Theano version to support # all optional features that otherwise require a bleeding-edge Theano try: reload except NameError: try: from importlib import reload except ImportError: from imp import reload if not hasattr(theano.tensor.nnet, 'conv3d'): theano.tensor.nnet.conv3d = Mock() reload(lasagne.layers.conv) if not hasattr(theano.tensor.nnet.abstract_conv, 'AbstractConv3d_gradInputs'): theano.tensor.nnet.abstract_conv.AbstractConv3d_gradInputs = Mock() reload(lasagne.layers.conv) if not hasattr(theano.tensor.signal.pool, 'pool_3d'): theano.tensor.signal.pool.pool_3d = Mock() reload(lasagne.layers.pool) reload(lasagne.layers) Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/0000755000175000017500000000000013307306052023457 5ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/nonlinearities.rst0000644000175000017500000000137313307306052027240 0ustar sinclairssinclairs:mod:`lasagne.nonlinearities` ============================= .. automodule:: lasagne.nonlinearities .. autosummary:: sigmoid softmax tanh ScaledTanH rectify LeakyRectify leaky_rectify very_leaky_rectify elu SELU selu softplus linear identity Detailed description -------------------- .. autofunction:: sigmoid .. autofunction:: softmax .. autofunction:: tanh .. autoclass:: ScaledTanH :members: .. autoclass:: ScaledTanh .. autofunction:: rectify .. autoclass:: LeakyRectify :members: .. autofunction:: leaky_rectify .. autofunction:: very_leaky_rectify .. autofunction:: elu .. autoclass:: SELU :members: .. autofunction:: selu .. autofunction:: softplus .. autofunction:: linear .. autofunction:: identity Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/objectives.rst0000644000175000017500000000102413307306052026343 0ustar sinclairssinclairs:mod:`lasagne.objectives` ========================= .. automodule:: lasagne.objectives Loss functions -------------- .. autofunction:: binary_crossentropy .. autofunction:: categorical_crossentropy .. autofunction:: squared_error .. autofunction:: binary_hinge_loss .. autofunction:: multiclass_hinge_loss .. autofunction:: huber_loss Aggregation functions --------------------- .. autofunction:: aggregate Evaluation functions -------------------- .. autofunction:: binary_accuracy .. autofunction:: categorical_accuracy Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/updates.rst0000644000175000017500000000113313307306052025654 0ustar sinclairssinclairs:mod:`lasagne.updates` ====================== .. automodule:: lasagne.updates Update functions ---------------- .. autofunction:: sgd .. autofunction:: momentum .. autofunction:: nesterov_momentum .. autofunction:: adagrad .. autofunction:: rmsprop .. autofunction:: adadelta .. autofunction:: adam .. autofunction:: adamax .. autofunction:: amsgrad Update modification functions ----------------------------- .. autofunction:: apply_momentum .. autofunction:: apply_nesterov_momentum Helper functions ---------------- .. autofunction:: norm_constraint .. autofunction:: total_norm_constraint Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/random.rst0000644000175000017500000000020113307306052025462 0ustar sinclairssinclairs:mod:`lasagne.random` ===================== .. automodule:: lasagne.random .. autofunction:: get_rng .. autofunction:: set_rng Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers.rst0000644000175000017500000000671613307306052025522 0ustar sinclairssinclairs:mod:`lasagne.layers` ===================== .. automodule:: lasagne.layers .. toctree:: :hidden: layers/helper layers/base layers/input layers/dense layers/conv layers/local layers/pool layers/recurrent layers/noise layers/shape layers/merge layers/normalization layers/embedding layers/special layers/corrmm layers/cuda_convnet layers/dnn .. rubric:: :doc:`layers/helper` .. autosummary:: :nosignatures: get_output get_output_shape get_all_layers get_all_params count_params get_all_param_values set_all_param_values .. rubric:: :doc:`layers/base` .. autosummary:: :nosignatures: Layer MergeLayer .. rubric:: :doc:`layers/input` .. autosummary:: :nosignatures: InputLayer .. rubric:: :doc:`layers/dense` .. autosummary:: :nosignatures: DenseLayer NINLayer .. rubric:: :doc:`layers/conv` .. autosummary:: :nosignatures: Conv1DLayer Conv2DLayer Conv3DLayer TransposedConv2DLayer Deconv2DLayer DilatedConv2DLayer .. rubric:: :doc:`layers/local` .. autosummary:: :nosignatures: LocallyConnected2DLayer .. rubric:: :doc:`layers/pool` .. autosummary:: :nosignatures: MaxPool1DLayer MaxPool2DLayer MaxPool3DLayer Pool1DLayer Pool2DLayer Pool3DLayer Upscale1DLayer Upscale2DLayer Upscale3DLayer GlobalPoolLayer FeaturePoolLayer FeatureWTALayer SpatialPyramidPoolingLayer .. rubric:: :doc:`layers/recurrent` .. autosummary:: :nosignatures: CustomRecurrentLayer RecurrentLayer LSTMLayer GRULayer Gate .. rubric:: :doc:`layers/noise` .. autosummary:: :nosignatures: DropoutLayer dropout dropout_channels spatial_dropout dropout_locations GaussianNoiseLayer .. rubric:: :doc:`layers/shape` .. autosummary:: :nosignatures: ReshapeLayer reshape FlattenLayer flatten DimshuffleLayer dimshuffle PadLayer pad SliceLayer .. rubric:: :doc:`layers/merge` .. autosummary:: :nosignatures: ConcatLayer concat ElemwiseMergeLayer ElemwiseSumLayer .. rubric:: :doc:`layers/normalization` .. autosummary:: :nosignatures: LocalResponseNormalization2DLayer BatchNormLayer batch_norm StandardizationLayer instance_norm layer_norm .. rubric:: :doc:`layers/embedding` .. autosummary:: :nosignatures: EmbeddingLayer .. rubric:: :doc:`layers/special` .. autosummary:: :nosignatures: NonlinearityLayer BiasLayer ScaleLayer standardize ExpressionLayer InverseLayer TransformerLayer TPSTransformerLayer ParametricRectifierLayer prelu RandomizedRectifierLayer rrelu .. rubric:: :doc:`layers/corrmm` .. autosummary:: :nosignatures: corrmm.Conv2DMMLayer .. rubric:: :doc:`layers/cuda_convnet` .. autosummary:: :nosignatures: cuda_convnet.Conv2DCCLayer cuda_convnet.MaxPool2DCCLayer cuda_convnet.ShuffleBC01ToC01BLayer cuda_convnet.bc01_to_c01b cuda_convnet.ShuffleC01BToBC01Layer cuda_convnet.c01b_to_bc01 cuda_convnet.NINLayer_c01b .. rubric:: :doc:`layers/dnn` .. autosummary:: :nosignatures: dnn.Conv2DDNNLayer dnn.Conv3DDNNLayer dnn.MaxPool2DDNNLayer dnn.Pool2DDNNLayer dnn.MaxPool3DDNNLayer dnn.Pool3DDNNLayer dnn.SpatialPyramidPoolingDNNLayer dnn.BatchNormDNNLayer dnn.batch_norm_dnn Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/utils.rst0000644000175000017500000000061213307306052025350 0ustar sinclairssinclairs:mod:`lasagne.utils` ==================== .. automodule:: lasagne.utils .. autodata:: int_types :annotation: = (numbers.Integral, np.integer) .. autofunction:: floatX .. autofunction:: shared_empty .. autofunction:: as_theano_expression .. autofunction:: collect_shared_vars .. autofunction:: one_hot .. autofunction:: unique .. autofunction:: compute_norms .. autofunction:: create_param Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/regularization.rst0000644000175000017500000000060313307306052027247 0ustar sinclairssinclairs:mod:`lasagne.regularization` ============================= .. automodule:: lasagne.regularization Helper functions ---------------- .. autofunction:: apply_penalty .. autofunction:: regularize_layer_params .. autofunction:: regularize_layer_params_weighted .. autofunction:: regularize_network_params Penalty functions ----------------- .. autofunction:: l1 .. autofunction:: l2 Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/0000755000175000017500000000000013307306052024756 5ustar sinclairssinclairsLasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/conv.rst0000644000175000017500000000240713307306052026460 0ustar sinclairssinclairsConvolutional layers -------------------- .. automodule:: lasagne.layers.conv .. currentmodule:: lasagne.layers .. autoclass:: Conv1DLayer :members: .. autoclass:: Conv2DLayer :members: .. note:: For experts: ``Conv2DLayer`` will create a convolutional layer using ``T.nnet.conv2d``, Theano's default convolution. On compilation for GPU, Theano replaces this with a `cuDNN`_-based implementation if available, otherwise falls back to a gemm-based implementation. For details on this, please see the `Theano convolution documentation`_. Lasagne also provides convolutional layers directly enforcing a specific implementation: :class:`lasagne.layers.dnn.Conv2DDNNLayer` to enforce cuDNN, :class:`lasagne.layers.corrmm.Conv2DMMLayer` to enforce the gemm-based one, :class:`lasagne.layers.cuda_convnet.Conv2DCCLayer` for Krizhevsky's `cuda-convnet`_. .. autoclass:: Conv3DLayer :members: .. _cuda-convnet: https://code.google.com/p/cuda-convnet/ .. _cuDNN: https://developer.nvidia.com/cudnn .. _Theano convolution documentation: http://deeplearning.net/software/theano/library/tensor/nnet/conv.html .. autoclass:: TransposedConv2DLayer :members: .. autoclass:: Deconv2DLayer .. autoclass:: DilatedConv2DLayer :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/cuda_convnet.rst0000644000175000017500000000062313307306052030161 0ustar sinclairssinclairs:mod:`lasagne.layers.cuda_convnet` ---------------------------------- This module houses layers that require `pylearn2 ` to work. Its layers are not automatically imported into the :mod:`lasagne.layers` namespace: To use these layers, you need to ``import lasagne.layers.cuda_convnet`` explicitly. .. automodule:: lasagne.layers.cuda_convnet :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/corrmm.rst0000644000175000017500000000051113307306052027004 0ustar sinclairssinclairs:mod:`lasagne.layers.corrmm` ---------------------------- This module houses layers that require a GPU to work. Its layers are not automatically imported into the :mod:`lasagne.layers` namespace: To use these layers, you need to ``import lasagne.layers.corrmm`` explicitly. .. automodule:: lasagne.layers.corrmm :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/pool.rst0000644000175000017500000000126613307306052026466 0ustar sinclairssinclairsPooling layers -------------- .. automodule:: lasagne.layers.pool .. currentmodule:: lasagne.layers .. autoclass:: MaxPool1DLayer :members: .. autoclass:: MaxPool2DLayer :members: .. autoclass:: MaxPool3DLayer :members: .. autoclass:: Pool1DLayer :members: .. autoclass:: Pool2DLayer :members: .. autoclass:: Pool3DLayer :members: .. autoclass:: Upscale1DLayer :members: .. autoclass:: Upscale2DLayer :members: .. autoclass:: Upscale3DLayer :members: .. autoclass:: GlobalPoolLayer :members: .. autoclass:: FeaturePoolLayer :members: .. autoclass:: FeatureWTALayer :members: .. autoclass:: SpatialPyramidPoolingLayer :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/merge.rst0000644000175000017500000000040513307306052026606 0ustar sinclairssinclairsMerge layers ------------ .. automodule:: lasagne.layers.merge .. currentmodule:: lasagne.layers .. autoclass:: ConcatLayer :members: .. autoclass:: concat .. autoclass:: ElemwiseMergeLayer :members: .. autoclass:: ElemwiseSumLayer :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/base.rst0000644000175000017500000000027313307306052026424 0ustar sinclairssinclairsLayer base classes ------------------ .. automodule:: lasagne.layers.base .. currentmodule:: lasagne.layers .. autoclass:: Layer :members: .. autoclass:: MergeLayer :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/dense.rst0000644000175000017500000000026313307306052026607 0ustar sinclairssinclairsDense layers ------------ .. automodule:: lasagne.layers.dense .. currentmodule:: lasagne.layers .. autoclass:: DenseLayer :members: .. autoclass:: NINLayer :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/local.rst0000644000175000017500000000023113307306052026576 0ustar sinclairssinclairsLocal layers ------------ .. automodule:: lasagne.layers.local .. currentmodule:: lasagne.layers .. autoclass:: LocallyConnected2DLayer :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/special.rst0000644000175000017500000000116013307306052027126 0ustar sinclairssinclairsSpecial-purpose layers ---------------------- .. automodule:: lasagne.layers.special .. currentmodule:: lasagne.layers .. autoclass:: NonlinearityLayer :members: .. autoclass:: BiasLayer :members: .. autoclass:: ScaleLayer :members: .. autofunction:: standardize .. autoclass:: ExpressionLayer :members: .. autoclass:: InverseLayer :members: .. autoclass:: TransformerLayer :members: .. autoclass:: TPSTransformerLayer :members: .. autoclass:: ParametricRectifierLayer :members: .. autofunction:: prelu .. autoclass:: RandomizedRectifierLayer :members: .. autofunction:: rrelu Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/input.rst0000644000175000017500000000021613307306052026646 0ustar sinclairssinclairsNetwork input ------------- .. automodule:: lasagne.layers.input .. currentmodule:: lasagne.layers .. autoclass:: InputLayer :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/noise.rst0000644000175000017500000000060113307306052026622 0ustar sinclairssinclairsNoise layers ------------ .. automodule:: lasagne.layers.noise .. currentmodule:: lasagne.layers .. autoclass:: DropoutLayer :members: .. autoclass:: dropout .. autofunction:: dropout_channels .. function:: spatial_dropout(incoming, *args, **kwargs) alias of :func:`dropout_channels` .. autofunction:: dropout_locations .. autoclass:: GaussianNoiseLayer :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/embedding.rst0000644000175000017500000000023513307306052027426 0ustar sinclairssinclairsEmbedding layers ---------------- .. automodule:: lasagne.layers.embedding .. currentmodule:: lasagne.layers .. autoclass:: EmbeddingLayer :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/normalization.rst0000644000175000017500000000057113307306052030401 0ustar sinclairssinclairsNormalization layers -------------------- .. automodule:: lasagne.layers.normalization .. currentmodule:: lasagne.layers .. autoclass:: LocalResponseNormalization2DLayer :members: .. autoclass:: BatchNormLayer :members: .. autofunction:: batch_norm .. autoclass:: StandardizationLayer :members: .. autofunction:: instance_norm .. autofunction:: layer_norm Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/shape.rst0000644000175000017500000000061113307306052026606 0ustar sinclairssinclairsShape layers ------------ .. automodule:: lasagne.layers.shape .. currentmodule:: lasagne.layers .. autoclass:: ReshapeLayer :members: .. autoclass:: reshape .. autoclass:: FlattenLayer :members: .. autoclass:: flatten .. autoclass:: DimshuffleLayer :members: .. autoclass:: dimshuffle .. autoclass:: PadLayer :members: .. autoclass:: pad .. autoclass:: SliceLayer Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/helper.rst0000644000175000017500000000053513307306052026772 0ustar sinclairssinclairsHelper functions ---------------- .. automodule:: lasagne.layers.helper .. currentmodule:: lasagne.layers .. autofunction:: get_output .. autofunction:: get_output_shape .. autofunction:: get_all_layers .. autofunction:: get_all_params .. autofunction:: count_params .. autofunction:: get_all_param_values .. autofunction:: set_all_param_values Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/recurrent.rst0000644000175000017500000000050213307306052027516 0ustar sinclairssinclairsRecurrent layers ---------------- .. automodule:: lasagne.layers.recurrent .. currentmodule:: lasagne.layers .. autoclass:: CustomRecurrentLayer :members: .. autoclass:: RecurrentLayer :members: .. autoclass:: LSTMLayer :members: .. autoclass:: GRULayer :members: .. autoclass:: Gate :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/layers/dnn.rst0000644000175000017500000000115313307306052026267 0ustar sinclairssinclairs:mod:`lasagne.layers.dnn` ------------------------- This module houses layers that require `cuDNN `_ to work. Its layers are not automatically imported into the :mod:`lasagne.layers` namespace: To use these layers, you need to ``import lasagne.layers.dnn`` explicitly. Note that these layers are not required to use cuDNN: If cuDNN is available, Theano will use it for the default convolution and pooling layers anyway. However, they allow you to enforce the usage of cuDNN or use features not available in :mod:`lasagne.layers`. .. automodule:: lasagne.layers.dnn :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/docs/modules/init.rst0000644000175000017500000000135413307306052025157 0ustar sinclairssinclairs:mod:`lasagne.init` =================== .. automodule:: lasagne.init Initializers ------------ .. autosummary:: Constant Normal Uniform Glorot GlorotNormal GlorotUniform He HeNormal HeUniform Orthogonal Sparse Detailed description -------------------- .. autoclass:: Initializer :members: .. autoclass:: Constant :members: .. autoclass:: Normal :members: .. autoclass:: Uniform :members: .. autoclass:: Glorot :members: .. autoclass:: GlorotNormal :members: .. autoclass:: GlorotUniform :members: .. autoclass:: He :members: .. autoclass:: HeNormal :members: .. autoclass:: HeUniform :members: .. autoclass:: Orthogonal :members: .. autoclass:: Sparse :members: Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/.coveragerc-nogpu0000644000175000017500000000017313307306052024327 0ustar sinclairssinclairs[run] omit = lasagne/tests/* lasagne/layers/corrmm.py lasagne/layers/cuda_convnet.py lasagne/layers/dnn.py Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/LICENSE0000644000175000017500000000264713307306052022075 0ustar sinclairssinclairsThe MIT License (MIT) Copyright (c) 2014-2015 Lasagne contributors Lasagne uses a shared copyright model: each contributor holds copyright over their contributions to Lasagne. The project versioning records all such contribution and copyright details. By contributing to the Lasagne repository through pull-request, comment, or otherwise, the contributor releases their content to the license and copyright terms herein. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/requirements.txt0000644000175000017500000000001613307306052024340 0ustar sinclairssinclairsTheano==0.8.2 Lasagne-a61b76fd991f84c50acdb7bea02118899b5fefe1/requirements-dev.txt0000644000175000017500000000017113307306052025116 0ustar sinclairssinclairs-r requirements.txt mock numpydoc pep8==1.6.2 pytest pytest-cov pytest-pep8 Jinja2==2.7.3 Sphinx==1.2.3 sphinx_rtd_theme