/** * @file: Mlp.hpp * * Interface to the MLP computations. Cares not about multi- or * single-objectivity, or anything else regarding the learning * algorithm; just provides objective function values and derivatives * for differentiable ones. * * Implements simple ASCII-text serialization that can be easily * parsed or written in Matlab or whatever analysis tool with minimal * string manipulations or conversions. * * All computations require an external workspace of doubles. This * inconvenience provides for a couple of optimizations: * * (1) The forward pass can be done only once even if the error and * backpropagation phases are done for different formulations. A * copy of the workspace must be made before error and gradient * evaluation, but the costly sums and sigmoids need not be * evaluated twice. * * (2) Only one thread-wise storage could be used for sequentially * evaluating a population of MLPs when the maximum required * space is known beforehand. * */ #ifndef MLP_HPP_ #define MLP_HPP_ #include "SynapticRandomizer.hpp" #include #include #include #include #include using std::vector; using std::cout; using std::ostream; using std::istream; using std::string; namespace jymlp{ /** As of now, define activation functions as an enum. FIXME: hyptan actually is tansig*/ namespace actf{ enum ActF : int {Unset = 0, linear = 1, hyptan = 2, logsig = 3}; } /** * The error function type. q2a2 corresponds to computing mean squared * error (MSE), q2a1 to computing mean euclidean vector norm error * (MEE), and q1a1 to computing mean absolute error (MAE). */ namespace errt{ enum ErrT : int {Unset = 0, q2a2 = 1, q2a1 = 2, q1a1 = 3}; } using jymlp::actf::ActF; using jymlp::errt::ErrT; enum PrettyPrintStyle : int {plaintext = 0, latexeq = 1}; class Mlp{ protected: /** Linear storage of weights */ vector weights; /** Linear storage of neuron count in-hid-out */ vector nneur; /** Linear storage of activation types */ vector actf; /** Re-initialize weights using random values from U(-a,a). */ void initRnd(double a, SynapticRandomizer & sr); public: /** * Construct uninitialized; only for reading content from a stream * very soon. */ Mlp(); /** Construct w. all-zero weights and tanh-tanh-lin activ * (default). */ Mlp(const vector& inneur); /** Construct with given weights and actfs. Deep copies are made. */ Mlp(const vector& inneur, const vector& iactf, const vector& iweights); /** Construct a deep-copy of another similar dude. */ Mlp(const Mlp &other); /** Construct with given weights and actfs. Initialize with * weights with a given SynapticRandomizer. If you use threads, * initialize the randomizers properly for each thread! */ Mlp(const vector& inneur, const vector& iactf, SynapticRandomizer& sr); /** * Return the number of doubles needed in the external workspace * storage that is always required for the layerwise computations. */ size_t getWorkspaceSize(); /** Return number of layers as input-hidden-...-hidden-output */ size_t getNLayers() const {return nneur.size();} /** Return the total number of weights. */ size_t getNWeights() const {return weights.size();} /** Return the total number of nodes/neurons on a layer. */ size_t getNNeurons(int layer) const {return nneur[layer];} /** Return the total number of neurons on hidden layers. */ size_t getNHiddenNeurons() const; /** * Return the number of non-zero weights; the count is for strict * zeros without numerical tolerance, so small weights should be * pruned before this measure is useful. */ size_t getNumNonzeroWeights() const; /** * Return the number of connected input nodes, i.e., those which * feed into at least one neuron on the next layer. */ size_t getNumConnectedInputs() const; /** Return the number of input nodes. */ size_t getNumInputs() const {return nneur[0];} /** Return the number of output nodes. */ size_t getNumOutputs() const {return nneur[nneur.size()-1];} /** * Feed an input vector to produce the outputs of each layer; the * outputs are stored in the external workspace memory array for * future examination or backpropagation computations. Size of the * workspace must be at least numberOfLayers * max(nneur). */ void forward(const vector &input, double * workspace) const; /** * Evaluate the error vector (N(x)-t) for a target vector; call * this after forward() if the error was to be computed instead of * only the network outputs; updates the result in the workspace. */ void errorVec(const vector &target, double * workspace) const; /** Create and return a copy of the current workspace output * (network output or error vector, depending on workspace * state) */ vector copyOutputVec(double * workspace) const; /** Return a base-1 index to the class (1,2,...C) represented by * the output (must be called directly after the forward pass) */ size_t outputVecAsClassIndex(const double * workspace) const; #if 0 /** * Add coeff * ||e||^2 to destination, without backpropagation. */ void addEuc2(double coeff, double * dest, double * workspace) const; #endif /** * Add the weighted error to the destination value and optionally * its gradient by backpropagating through the workspace; must be * called after forward() and errorVec() because the error vector * must be computed in the workspace in order to evaluate the * error contribution or backpropagate. Passing nullptr as destG * bypasses the backpropagation step, in which case no changes are * made to the workspace. * * FIXME: That means that the function could be overloaded with a * const* version for workspace!! */ void backwardEucSq(double coeff, double * destE, double * destG, double * workspace, ErrT errortype) const; /** * Add a weight decay term of the form coeff/2 * sum w_i^2 - sum of * either all weights, or weights except the biases of the output * layer. */ void weightDecaySq(double coeff, double *destE, double *destG, bool excludeOutputBias) const; /** * Add a weight decay term of the form coeff * sum abs(w_i) - sum * of either all weights, or weights except the biases of the * output layer. */ void weightDecayAbs(double coeff, double *destE, double *destG, bool excludeOutputBias) const; /** * Update the weights by adding values multiplied by a coefficient * to the current weight values. For example update(-.001, grad) * for the simplest kind of steepest-descent backprop. The same * function can be used for random "jogging", for example * update(.1, gaussian). */ void update(double coeff, const double *wupd); /** * Evaluate and add to the referred outputs the squared or * non-squared error and partial (sub)derivatives wrt weights, * multiplied by a coefficient, for a given input and a given * target vector. * * This is likely to be the innermost computation in algorithmic * iteration, and thus the function that dominates the * computational cost. Therefore, exceptionally, the output * parameters are raw pointers to numerical vectors, and there may * be a nullptr for any value that is not necessary for the * current purpose. * * Some profiling should be done to make sure this really is the * place to optimize and if useful optimizations can be done. * * FIXME: Split this to (1) forward eval, (2) error computation, * (3) backward loop. */ void addErrorAndGradient(const vector &input, const vector &target, double coefficient, double * sqerror, double * error, vector * mseGrad, vector * meeGrad) const; /** Stream to an ASCII version, easily readable in simple scripts, * e.g., in Matlab or Octave. * * Format: Output always begins with format version. Currently only * "version 1" is implemented. * * Version 1 stream (values packed & separated with a single space): * * int version, * int[nlay] number of layers, * int[nlay] number of neurons on each layer, * ActF[nlay] activation functions (input activation must be * always Unset == 0), * double[nneurTot] synaptic weights as a row-major linear array. */ void toStream(ostream & o); /** Set new values from an ASCII stream, as created by toStream() */ void fromStream(istream & ins); /** Pretty-print layer sizes, ex. "3-4-3" */ string prettyPrintLayerSizes(); /** Pretty-print layer activations, ex. "in-tanh-tanh-lin" */ string prettyPrintLayerActivations(); /** Pretty-print weight matrices. */ string prettyPrintWeights() const; /** Pretty-print gradient matrices. */ string prettyPrintGradient(const vector & grad) const; /** Pretty-print everything. */ string prettyPrint(); }; } #endif /* MLP_HPP_ */