#include #include #include #include #include #include #include "MlpIndividual.hpp" #include "Individual.hpp" //#include "MLPIndividualFactory.hpp" #include "Mlp.hpp" #include "Dataset.hpp" #include "Random.hpp" using namespace std; MlpBackpropUnaryOperator::MlpBackpropUnaryOperator(size_t iters, double step, const vector & classweights, bool rerandomize, bool applyssw, bool mee){ _iters = iters; _step = step; _preErr = 1./0.; // FIXME: std::numeric_limits::infinity() _incRat = 1.05; // FIXME: Take this from init. Is this useful, after all? _decRat = .95; // FIXME: Take this from init. Is this useful, after all? _classweights = classweights; _rerandomize = rerandomize; _sswpen = 0.0; // FIXME: Think about the weight penalty thing. _applyssw = applyssw; _mee = mee; } void MlpBackpropUnaryOperator::operate(MlpIndividual & mi, mt19937 * mt){ vector ws(mi.mlp->getWorkspaceSize()); vector grad(mi.mlp->getNWeights()); /* Could be part of improver, too? */ double multip = 1.0 / mi.ds->getNRows(); // this accounts for 1/N // TODO: could ds or // selection be property // of the improver object? // FIXME: Move to some UnaryOperator class? uniform_real_distribution u(0., .01); _sswpen = u(*mt); // FIXME: Hardcoded, unchanging!! // If we want to aim randomly every time, we need a stochastic element here: if (_rerandomize){ randomize1(_classweights, mt); } /* Are we "robust" MEE or "standard" MSE? */ jymlp::errt::ErrT et=(_mee)? (jymlp::errt::q2a1):(jymlp::errt::q2a2); /** FIXME: Placeholder hack here, totally: */ // We remain MEE only 50% of the time! if (_mee) et = (u(*mt)>.005)? (jymlp::errt::q2a1):(jymlp::errt::q2a2); for(size_t iterations=0;iterations < _iters; ++iterations){ for (double& d: grad) d=0.0; double err = 0.0; // evaluate cumulative error and gradient. for (size_t i = 0; igetNRows(); ++i){ mi.mlp->forward(mi.ds->row(i), ws.data()); mi.mlp->errorVec(mi.ds->prototype(i), ws.data()); mi.mlp->backwardEucSq(_classweights[mi.ds->getTargetClass(i)-1]*multip, &err, grad.data(), ws.data(), et); } // FIXME: Hardcoded application of weight decay: if (_applyssw){ mi.mlp->weightDecaySq(_sswpen/mi.mlp->getNWeights(), &err, grad.data(),true); } //cerr << "We have error " << sse << endl; //cerr << "We have grad "; for (auto g: grad) cerr << g << " "; cerr << endl; //cerr << mlp->prettyPrintGradient(grad); //cerr << "Have now done an MLP eval" << endl; //_step *= (err > _preErr)?_decRat:_incRate mi.mlp->update(-_step,grad.data()); #ifdef ADAPTIVE // Adaptive? Should re-compute error, though.. and take a step // back upon failure.. _preErr = err; // evaluate cumulative error again, without touching gradient. err = 0.0; for (size_t i = 0; igetNRows(); ++i){ mi.mlp->forward(mi.ds->row(i), ws.data()); mi.mlp->errorVec(mi.ds->prototype(i), ws.data()); mi.mlp->backwardEucSq(_classweights[mi.ds->getTargetClass(i)-1]*multip, &err, nullptr, ws.data(), et); } if (err > _preErr) { // We won't allow worsening, ever. mi.mlp->update(_step,grad.data()); _step *= _decRat; } else { _step *= _incRat; } #endif } } /* They want an uninitialized MLP; then they shall have it.. */ MlpIndividual::MlpIndividual() { mlp = unique_ptr(new jymlp::Mlp()); } MlpIndividual::MlpIndividual(const MlpObjectives& mobj, unique_ptr imlp, shared_ptr ids) : Individual(mobj.nobj()) { mlp = move(imlp); ds = ids; objkind = mobj.kinds(); } MlpIndividual::MlpIndividual(const MlpIndividual & original) : Individual(original) { mlp = unique_ptr(new jymlp::Mlp(*original.mlp)); ds = original.ds; objkind = original.objkind; improver = unique_ptr(new MlpBackpropUnaryOperator(*original.improver)); } /** Default destructor*/ MlpIndividual::~MlpIndividual(){ // No need to destroy explicitly? (Check if this leaks). } Individual * MlpIndividual::clone(){ return new MlpIndividual(*this); } void MlpIndividual::addUnaryOperator(unique_ptr uop){ improver = move(uop); } #if 0 // FIXME: Not needed here as of yet.. maybe ever? static double squared_distance(const vector &x, const vector &y){ double res = 0.; for(int i=0; i< (int)x.size(); ++i){ res += (x[i]-y[i]) * (x[i]-y[i]); } return res; } #endif static double divide_and_return_min(vector &ersum, const shared_ptr ds){ // Adjust to [0,1] and find minimum: size_t imin=0; for(size_t i=0;igetNClasses();++i){ ersum[i] /= ds->getNRowsInClass(i+1); if (ersum[i] &v){ double tot = 0.0; for (auto const & d : v) tot += d; return tot; } void MlpIndividual::evaluate(){ /* FIXME: At some point, we'll probably want interchangeable evaluator objects! (maybe..) To reduce redundant computations, this would require a more involved object structure with global coordination. As of now, I'm "keeping it stupidly simple" to the outside world by having a joint evaluator function for everything. As a downside, this evaluator function is not so simple internally.. */ vector ws(mlp->getWorkspaceSize()); #if 0 //double multip = 1.0 / ds->getNRows(); // this accounts for 1/N double mse = 0.0; // used only when MSE is selected double mee = 0.0; // used only when MEE is selected double mae = 0.0; // used only when MAE is selected #endif // Zero-init all objectives. We'll cumulate sums for most: for (double &d: _objectives) d=0.0; /* We do a single pass over the dataset, computing all of the * requested data-related objectives. Each is used only when * respective objectives are selected: */ vector cwerr(ds->getNClasses(),0.); vector cwmse(ds->getNClasses(),0.); vector cwmee(ds->getNClasses(),0.); vector cwmae(ds->getNClasses(),0.); for (size_t i = 0; igetNRows(); ++i){ size_t tclass = ds->getTargetClass(i); /* We always do the forward pass: */ mlp->forward(ds->row(i), ws.data()); /* After the fw pass, evaluation depends on objective selection*/ /* Accumulate continuous error measures (sum at this point): */ if (objkind & (okind::mse|okind::mee|okind::mae |okind::cwmse|okind::cwmee|okind::cwmae)) { mlp->errorVec(ds->prototype(i), ws.data()); } if (objkind & (okind::mse|okind::cwmse)) { mlp->backwardEucSq(1.0, &cwmse[tclass-1], nullptr, ws.data(), jymlp::errt::q2a2); } if (objkind & (okind::mee|okind::cwmee)) { mlp->backwardEucSq(1.0, &cwmee[tclass-1], nullptr, ws.data(), jymlp::errt::q2a1); } if (objkind & (okind::mae|okind::cwmae)) { mlp->backwardEucSq(1.0, &cwmae[tclass-1], nullptr, ws.data(), jymlp::errt::q1a1); } /* Accumulate discrete error measures (sum at this point): */ if (objkind & (okind::cwerr|okind::err|okind::mincwe)) { size_t pclass = mlp->outputVecAsClassIndex(ws.data()); if (tclass != pclass){ cwerr[tclass-1] += 1.0; // Cumulate number of cw errors } } } /* Insert objectives to their proper indices. Look carefully at MlpObjectives to keep the order the same! Indices vary with number of classification classes! */ size_t insert_loc = 0; /* Classwise error measures */ if (objkind & okind::cwmse){ for(size_t i=0;igetNClasses();++i) _objectives[insert_loc++]=cwmse[i]/ds->getNRowsInClass(i+1); } if (objkind & okind::cwmee){ for(size_t i=0;igetNClasses();++i) _objectives[insert_loc++]=cwmee[i]/ds->getNRowsInClass(i+1); } if (objkind & okind::cwmae){ for(size_t i=0;igetNClasses();++i) _objectives[insert_loc++]=cwmae[i]/ds->getNRowsInClass(i+1); } if (objkind & okind::cwerr){ for(size_t i=0;igetNClasses();++i) _objectives[insert_loc++]=cwerr[i]/ds->getNRowsInClass(i+1); } /* Single error measures */ if (objkind & okind::mse){ _objectives[insert_loc++] = just_sum(cwmse)/ds->getNRows(); } if (objkind & okind::mee){ _objectives[insert_loc++] = just_sum(cwmee)/ds->getNRows(); } if (objkind & okind::mae){ _objectives[insert_loc++] = just_sum(cwmae)/ds->getNRows(); } if (objkind & okind::err){ _objectives[insert_loc++] = just_sum(cwerr)/ds->getNRows(); } /* Minimum classwise error measures. NOTE: These functions modify * the originals to compute classwise means: */ if (objkind & okind::mincwmse){ _objectives[insert_loc++] = divide_and_return_min(cwmse,ds); } if (objkind & okind::mincwmee){ _objectives[insert_loc++] = divide_and_return_min(cwmee,ds); } if (objkind & okind::mincwmae){ _objectives[insert_loc++] = divide_and_return_min(cwmae,ds); } if (objkind & okind::mincwe){ _objectives[insert_loc++] = divide_and_return_min(cwerr,ds); } /* Continuous MLP complexity measures */ if (objkind & okind::msw){ double ssw = 0.0; // with weight 2.0 because .5 in equation: mlp->weightDecaySq(2.0,&ssw,nullptr,false); _objectives[insert_loc++] = ssw/mlp->getNWeights(); } if (objkind & okind::mswnb){ double ssw = 0.0; // with weight 2.0 because .5 in equation: mlp->weightDecaySq(2.0,&ssw,nullptr,true); _objectives[insert_loc++] = ssw/(mlp->getNWeights() - mlp->getNumOutputs()); } if (objkind & okind::maw){ double saw = 0.0; mlp->weightDecayAbs(1.0,&saw,nullptr,false); _objectives[insert_loc++] = saw/mlp->getNWeights(); } if (objkind & okind::mawnb){ double saw = 0.0; mlp->weightDecayAbs(1.0,&saw,nullptr,true); _objectives[insert_loc++] = saw/(mlp->getNWeights() - mlp->getNumOutputs()); } /* Discrete MLP complexity measures */ if (objkind & okind::nhid){ _objectives[insert_loc++] = mlp->getNHiddenNeurons(); } if (objkind & okind::nnzw){ _objectives[insert_loc++] = mlp->getNumNonzeroWeights(); } if (objkind & okind::ninput){ _objectives[insert_loc++] = mlp->getNumConnectedInputs(); } /* Measures based on a validation set (not yet implemented) */ if (objkind & okind::vsmse){ throw runtime_error("Not implemented: vsmse"); } if (objkind & okind::vsmee){ throw runtime_error("Not implemented: vsmee"); } if (objkind & okind::vsmae){ throw runtime_error("Not implemented: vsmae"); } if (objkind & okind::vserr){ throw runtime_error("Not implemented: vserr"); } } vector MlpIndividual::getTrace(){ vector res = Individual::getTrace(); // FIXME: This is just a mock-up, before the optional tracing // finds its proper place in the implementation. Compute sum of // absolute weight (all weights included here, biases and others): double saw = 0.0; mlp->weightDecaySq(1.0,&saw,nullptr,false); res.push_back(saw); return res; } void MlpIndividual::mutate() { cerr << "WARNING: Unimplemented feature MlpIndividual::mutate() - nothing done." << endl; } void MlpIndividual::improve(mt19937 * mt) { improver->operate(*this, mt); } // FIXME: Science will happen here (and just a couple of other places) pair,unique_ptr > MlpIndividual::crossWith(Individual & other) { // First, make children a&b that look exactly like mum and dad: unique_ptr a(new MlpIndividual(*this)); unique_ptr b(new MlpIndividual((MlpIndividual&)other)); // Then use the member function, and return a pair: a->crossWith(*b); return pair, unique_ptr > (move(a),move(b)); } // FIXME: Science will happen here (and just a couple of other places) void MlpIndividual::crossWith(MlpIndividual &other){ other.getRank(); cerr << "WARNING: Unimplemented feature MlpIndividual::crossWith() - nothing done." << endl; } void MlpIndividual::impl_to_stream(ostream & repr) { mlp->toStream(repr); } void MlpIndividual::impl_from_stream(istream & repr) { mlp->fromStream(repr); } Individual * MlpIndividualReader::fromStream(istream & ist) const{ Individual *ni = new MlpIndividual(); ni->fromStream(ist); return ni; }