#ifndef DATASET_HPP_
#define DATASET_HPP_

#include <string>
#include <vector>
#include <iostream>

using namespace std;
/**
 * Reads a full dataset; limited to those that fit in memory. A
 * pointer to this must be shared among all users. As of yet, works
 * only for classification datasets, where the last value on each row
 * is the integral class label and all other values are real-valued
 * features.
 *
 */
class Dataset {
protected:
    vector < vector < double > > invec; /// input vectors
    vector < size_t > targetc; /// target classes 1,2,...
    vector < vector < double > > prototypes; /// target prototypes
    size_t nclasses; /// number of classes; discovered upon load
    vector < size_t > ninc; /// count of rows in each class 1,2... ninc[0] unused
public:
    Dataset() = delete;

    /** Read from a file. TODO: differentiate classification / approx? */
    Dataset(string fname, double enc0, double enc1);

    /** Look at an indexed row of data. TODO: Likely to replace this
     *  with iterators that can be generated for splits. Or, rather,
     *  objects of type DataSplit.
     */
    const vector <double> & row(size_t i) const;

    /** Return number of classes. TODO: classification vs. approx? */
    size_t getNClasses() const;

    /** Return number of rows / instances */
    size_t getNRows() const;

    /**
     * Return target class 1..N. NOTE: class indices are base 1 while
     * data indices are base 0!
     */
    size_t getTargetClass(size_t ind) const;

    /**
     * Return the number of rows that belong to a given class.
     */
    size_t getNRowsInClass(size_t c) const;

    /**
     * Return a binary vector encoding the target class; the real
     * values for the encoding must be given as a constructor
     * parameter; default is -1 for non-class elements and +1 for
     * class elements.
     */
    const vector <double> & prototype(size_t row);


    /** Dump data to a stream.*/
    void toStream(ostream & ost) const;
};

#endif