/*************************************************************************************
    DBCountPreprocessor.java
    Preprocesses data ready to calculate a count diamond.  No measures.
    Copyright (C) 2008  Hazel Webb hazel.webb@unb.ca

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
**/

/**
 * DBCountPreprocessor.java
 * input: 	fname: csv fact table
 *               d:  number of dimensions
 *               [ n:  normalized binary output]
 *        and/or [ s: hash table(Key, Count) character output]
 *
 * output:   1)	array file first line indicates number of dims
 *		           second line cardinalities for each dim
 *			   remaining lines counts for each attr
 *	     2)	binary file data stored as packed ints (no eol to indicate new fact)
 *	    	normalizes data starting from 0
 *
 *           3) hash table files suitable for reading by DBCount.java as an option
 *
 * It is expected that the output file will be used by DBCount_binary or DBCount to process for K.
 *
 * May 6, 2008

 * changes made to standardize interface and provide optional output January 2009
 * Hazel Webb
 ******************************************************************************************/
import java.util.*;
import java.io.*;

public class DBCountPreprocessor {


    private int d;
    //number of data dims
    private static boolean v = false;
    //noisy output
    private String file;
    //store input file name
    private Vector<Integer>[]data;
    //data structure ragged array of Vectors to store counts for nomalized attributes
    private Hashtable[] mappedCoords;
    //ds  maps attribute values to their counts
    private Hashtable[] mappedNorms;
    //ds helps to transform data to normalized integers
    private static boolean norm = false;
    //request normalized output
    private static boolean hash = false;
    //request hashtable character output



    public static void main(String[] args) {

        DBCountPreprocessor proc = new DBCountPreprocessor();
        Hashtable<String,Integer> hashFlags = new Hashtable<String,Integer>();
        //hardcode maps from flag strings to integers
        hashFlags.put("-filename", new Integer(0));
        hashFlags.put("-f", new Integer(0));
        hashFlags.put("-d", new Integer(1));
        hashFlags.put("-n", new Integer(2));
        hashFlags.put("-s", new Integer(3));
        hashFlags.put("-h", new Integer(4));
        hashFlags.put("-v", new Integer(5));
        if (args.length < 4) {
            System.out.println("Usage: DBCountPreprocessor -f filename -d dims -n AND/OR -s [-v verbose]");
            System.exit(1);
        }
        int iarg;
        for (int a=0; a<args.length; a++) {
            iarg=4;
            //map this argument to its int
            try {
                iarg=(int)hashFlags.get(args[a]);
            } catch (Exception e) {
                System.err.println("Problem with argument: "+args[a]);
                e.printStackTrace();
                System.exit(1);
            }

            // run that int through a switch
            switch (iarg) {
            case 0:
                a++;
                proc.file = args[a];
                break;
            case 1:
                a++;
                proc.d = Integer.parseInt(args[a]);
                //how many dimensions?
                proc.data = new Vector[proc.d];//cannot use generics Vector<Integer>[]
                for (int i = 0; i < proc.d; ++i)
                    proc.data[i] = new Vector();
                proc.mappedCoords = new Hashtable[proc.d];
                proc.mappedNorms = new Hashtable[proc.d];
                break;
            case 2:

                norm = true;
                break;
            case 3:

                hash = true;
                break;
            case 4:
                System.out.println("Usage: DBCountPP_binary -f filename - d dims [-v verbose]");
                System.exit(0);
                break;
            case 5:

                v = true;
            }
        }


        //timing starts
        long startTime = System.currentTimeMillis();
        proc.createHashTables(proc.file, proc.d);
        // createHashTables method processes for normalized or character data by checking
        // truth value of norm and hash

        if (proc.hash)proc.writeHashTables(proc.file);
        //only write the hash table files if requested

        if (proc.norm) {
            proc.writeArrays(proc.file);
            //only write the normalized files and arrays if requested
            proc.writeNormFile(proc.file, proc.file+".norm");
        }
        long endTime = System.currentTimeMillis();
        double time = (endTime - startTime)/1000.0;
        System.out.println("Time for preprocessing:  " + time + " seconds");
        //	proc.checkFile(proc.file);
        //sanity check only use on small files - it prints everything
    }

    /**
       writeArrays
      @param fname: String filename
      write data to file 'fname.ext_array'
      output file is used as input to DBCount_binary.java
      format of output file is packed integers
      numberOfDimensions dim0Size dim1Size ...dimnSize
      attr counts  for dim0;  attr counts for dim1 .... attr counts for dimn
    **/
    public void writeArrays(String fname) {
        fname = fname +"_array";
        File file = null;
        try {
            file = new File(fname);
            FileOutputStream file_output = new FileOutputStream(file);
            // Wrap the FileOutputStream with a DataOutputStream
            DataOutputStream data_out = new DataOutputStream(file_output);

            // Write the data to the file
            data_out.writeInt(d);
            //number dims
            for (int i = 0; i < d; ++i)
                data_out.writeInt(mappedNorms[i].size());
            //dim sizes

            for (int i=0; i < d; i++) {
                for (int j = 0; j < mappedNorms[i].size(); ++j)
                    data_out.writeInt(data[i].get(j));

            }
            // Close file when finished with it..
            file_output.close();

            //now make a back-up copy for use later when searching for kappa
            Runtime rt = Runtime.getRuntime();
            String cmd = "cp "+fname+" " + fname+ "_bak" ;
            System.out.println("executing: " + cmd);
            Process p = rt.exec(cmd);
        } catch (IOException e) {
            System.out.println ("IO exception = " + e );
        }
    }

    /**
      @param fname String file to read and print to standard out
      verifies execution
      prints values in name.array to standard out
     WARNING:  this method prints everything.  Use with care!
    **/
    public void checkFile(String fname) {

        if (norm) {
            File file = null;
            int i_data =0;

            try {
                file = new File(fname+"_array");
                FileInputStream file_input = new FileInputStream (file);
                DataInputStream data_in    = new DataInputStream (file_input );

                while (true) {
                    try {
                        i_data = data_in.readInt ();
                    } catch (EOFException eof) {
                        System.out.println ("End of File");
                        break;
                    }
                    // Print out the integers.
                    System.out.printf ("%3d \t", i_data );
                }
                data_in.close ();
            } catch  (IOException e) {
                System.out.println ( "IO Exception =: " + e );
            }
        } else
            System.out.println("Array option not chosen so validation not possible");
    }

    /**
       createHashTables
       @param fname String of the csv fact table to process.
       CSV file should have no headers or metadata.

       creates d hashtables of <key(attribute), value(count)> pairs
       creates d hash tables of <key(attribute>,value(normvalue)> pairs
       creates array of vectors that store counts for the normalized attributes

    **/
    public void createHashTables(String fname, int d) {
        BufferedReader raw;
        PrintWriter permanent_out;

        // initialize 'em happens regardless of output choice...
        for (int i = 0; i < d; i++) {
            mappedCoords[i]=new Hashtable<String,Integer>();
            mappedNorms[i] = new Hashtable<String,Integer>();
        }
        // map all of the attributes to their counts
        try {
            permanent_out = new PrintWriter(new BufferedWriter(new FileWriter(fname+"-string-to-norm")));

            raw = new BufferedReader(new InputStreamReader(new FileInputStream(fname)));
            String line = raw.readLine();
            Integer val = new Integer(1);
            Integer[] normVal = new Integer[d];
            //ds to store the normalized value for the next key in each dimension

            for (int i = 0; i < d; ++i)
                normVal[i] = new Integer(0);

            while (line!=null) {
                if (v) {
                    System.out.println("Line read: "+line);
                }
                //ensure a comma at the end of the line so split will extract all the fields
                if (!(line.charAt(line.length()-1)==','))
                    line = line + ",";
                String[] atts = line.split(",");
                for (int k = 0; k < d; ++k) {
                    String key = atts[k];
                    if (hash) {
                        if (!mappedCoords[k].containsKey(key)) {
                            //if it wasn't mapped yet
                            if (v) System.out.println("New Key: "+ key);

                            mappedCoords[k].put(key,val);
                            //add this key to mappedCoords hash
                        } else {
                            //increment count for this attribute

                            Integer intVal = (Integer)mappedCoords[k].get(key);
                            mappedCoords[k].put(key,intVal + val);
                            //increase count of this key in mappedCoords hash
                        }
                    }

                    if (norm) {
                        if (!mappedNorms[k].containsKey(key)) {
                            //if it wasn't mapped yet

                            if (v) System.out.println("New Key: "+ key);
                            mappedNorms[k].put(key,normVal[k]);
                            //add this key to mappedNorms hash and store the relationshiop permanently
                            permanent_out.println("dim: "+ k + "\t"+ key +"\t"+normVal[k].intValue());

                            data[k].add(val);
                            //add a place holder for counts in the appropriate vector

                            normVal[k] = (Integer)(normVal[k].intValue() + val.intValue());
                            //increase the current norm value for the next key seen
                        } else {
                            //increment count for this attribute
                            Integer count =(Integer) mappedNorms[k].get(key);
                            int accumulator = data[k].get(count);
                            data[k].set(count, accumulator+1);
                            //increase count of this key in the appropriate vector slot


                        }
                    }
                }

                line = raw.readLine();
            }
            raw.close();
            permanent_out.close();


        } catch (IOException ie) {
            ie.printStackTrace();
        }
        if (v && hash) {
            for (int i = 0; i < d; ++i)
                System.out.println(mappedCoords[i]);
        }

    }

    /**
       writeHashTables
       writes hashtables to file for future processing
       java does not guarantee order of elements returned from a HashTable, so it is necessary
       to run the (key,value) pairs through Enumerations in order to keep a persistent and accurate  record
    **/

    public void writeHashTables(String fname) {

        PrintWriter out, full_out;

        try {
            out = new PrintWriter(new BufferedWriter(new FileWriter(fname +"_COUNT")));
            full_out = new PrintWriter(new BufferedWriter(new FileWriter(fname+"_COUNT.full")));
            //write out metadata
            //number of dimensions
            out.print(d +"\n");
            full_out.print(d+"\n");



            //dimension sizes
            for (int index = 0; index < d; ++index) {

                out.print(index +" " +mappedCoords[index].size()+"\n");
                full_out.print(index +" " +mappedCoords[index].size()+"\n");
            }


            //attributes and their counts
            for (int index = 0; index < d; ++index) {
                Enumeration<Integer> values = mappedCoords[index].elements();
                Enumeration<String> keys = mappedCoords[index].keys();
                while (keys.hasMoreElements()) {
                    out.print(keys.nextElement()+"\t");
                    out.print(values.nextElement());
                    out.println();
                }
            }
            for (int index = 0; index < d; ++index) {
                Enumeration<Integer> values = mappedCoords[index].elements();
                Enumeration<String> keys = mappedCoords[index].keys();
                while (keys.hasMoreElements()) {
                    full_out.print(keys.nextElement()+"\t");
                    full_out.print(values.nextElement());
                    full_out.println();
                }
            }

            out.close();
            full_out.close();
        } catch (IOException ie) {
            System.out.println("inside writeDims");
            ie.printStackTrace();
        }
    }

    /**writeNormFile
       writes a normalized binary file of packed ints to writeFile
       @param readFile: string name of file to read
       @param writeFile: string name of file to write
    **/

    public void writeNormFile(String readFile, String writeFile) {
        DataOutputStream pw;
        PrintWriter writer;
        BufferedReader buf;
        try {
            System.out.println("writing normalized file.....");
            pw = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(new File(writeFile))));
            String normStringFileName = readFile.substring(0,readFile.lastIndexOf('.'))+"_normalized";
            writer = new PrintWriter(new BufferedWriter(new FileWriter(normStringFileName)));
            //write data
            buf = new BufferedReader(new InputStreamReader(new FileInputStream(readFile)));
            String line = buf.readLine();
            while (line != null) {
                if (!(line.charAt(line.length()-1)==','))
                    line = line +",";//ensure a comma at the end of the line

                String[] fields= line.split(",");
                for (int i = 0; i < d; ++i) {
                    pw.writeInt((Integer)mappedNorms[i].get(fields[i]));
                    //writing integers with no commas or linebreaks
                    writer.print(mappedNorms[i].get(fields[i])+",");
                }
                writer.println();
                line = buf.readLine();
            }
            pw.close();
            buf.close();
            writer.close();
        } catch (IOException ie) {
            ie.printStackTrace();
        }


    }

}

