import java.io.FileInputStream; import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.Random; import java.util.Scanner; /** * */ /** * I just very quickly made this class to help with loading example datasets. * No time to write comments, and I just stuffed this one-by-one with what I * happened to need to make the hands-on demonstrations to work right here * and now... * * @author nieminen * */ public class DataSet { static final Random rg = new Random(123); // (fixed seed) int nattrib; int n; int ntrain,ntest; double data[][]; int labels[]; double trainvec[][]; int trainlabel[]; double testvec[][]; int testlabel[]; private int[] indexPermutation; private void clearPermutation() { indexPermutation = new int[n]; for (int i=0;i0;i--){ int swap = rg.nextInt(i+1); int tmp = indexPermutation[swap]; indexPermutation[swap] = indexPermutation[i]; indexPermutation[i] = tmp; } splitToTrainAndTest(trainRatio); } public void dlmread(String fname, int labelInd) throws FileNotFoundException { ArrayList readdata = new ArrayList(); ArrayList readlabels = new ArrayList(); Scanner sc = new Scanner(new FileInputStream(fname)); int n = 0; int nattrib = 0; while(sc.hasNextLine()){ String line = sc.nextLine(); String[] spl = line.split(","); if (n==0) nattrib = spl.length; n++; for (int i=0;imaxt) maxt=truth[i]; } int[][] cm = new int[maxt+1][maxt+1]; for (int i=0;inum) num++; } return num; } /** Analyses training data to scale it thus: mean value is between min * and max, and no value is outside [min,max]. Applies the same * transformation to test data. Test data may lie outside [min,max]. * This is "realistic" since test data should be "unseen" before training * takes place. At least I intended to do such routine. May have bugs, I'm * pouring code quite fast at the moment, to get these done before the * hands-on sessions. */ public void normalize(double min, double max) { double[] origMin = new double[getVecSize()]; double[] origMax = new double[getVecSize()]; double[] origMean = new double[getVecSize()]; for(int i=0;i origMax[i]) origMax[i] = trainvec[v][i]; origMean[i] += trainvec[v][i]; } origMean[i] /= trainvec.length; } for(int i=0;i 0){ trainvec[i][j] /= Math.max(Math.abs(origMax[j]), Math.abs(origMin[j])); } trainvec[i][j] = min + .5 * (trainvec[i][j]+1)*(max-min); } } // Apply the same to test vectors. for(int i=0;i 0){ testvec[i][j] /= Math.max(Math.abs(origMax[j]), Math.abs(origMin[j])); } testvec[i][j] = min + .5 * (testvec[i][j]+1)*(max-min); } } } }