/** DBCount_binary.java Processes data and generates a count diamond. No measures. Copyright (C) 2008 Hazel Webb hazel.webb@unb.ca This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . **/ /** * DBCount_binary.java * input: 1) preprocessed binary file (output of DBCountPreprocessor.java) * format of input file: * number of dims, cardinalities for each dim, data - all integers * 2) k: number of carats * May 7, 2008 * Hazel Webb * January 19 2009 comments updated and I/O made consistent with Kumar.java ********************************************************************************/ import java.util.*; import java.io.*; public class DBCount_binary { private int d; //number of data dims private int K; //number of carats private int iters = 0; //count number of times the file is processed until convergence private boolean v = false; // for noisy output private boolean m = false; //for collecting metadata private String file; //to store input file name from command line int[][] data; //ragged array of arrays storing counts of the attrvals private long cellcounter; private int FILE_NAME_LENGTH; public static void main(String[] args) { DBCount_binary diamond = new DBCount_binary(); Hashtable hashFlags = new Hashtable(); //hardcode maps from flag strings to integers hashFlags.put("-filename",new Integer(0)); hashFlags.put("-f",new Integer(0)); hashFlags.put("-k",new Integer(1)); hashFlags.put("-h", new Integer(3)); hashFlags.put("-v", new Integer(2)); hashFlags.put("-m", new Integer(4)); if (args.length < 2) { System.out.println("Usage: DBCount -f filename -k carats [-v -m ]"); System.exit(1); } int iarg; for (int a=0; a 0) { data[j][attrs[j]]--; //decrement the count if (data[j][attrs[j]] < K) data[j][attrs[j]] = 0; //this should be deleted on next iter } } } if (del) break; //only delete this row once } } if (!del) { for (int v: attrs) { data_out.writeInt(v); //write this row to new diamond file cellcounter++; } } } } catch (EOFException eof) { //no need to see this System.out.println("end of file"); } if (m) { //end iteration timing iterEndTime = System.currentTimeMillis(); double iterTime = (iterEndTime - iterStartTime)/1000.0; System.out.printf("Time for processing iter %d with %d cells is %f\n", iters, cellcounter/d,iterTime); } data_in.close (); data_out.close(); if (v) System.out.println("read " + linesread + " lines from file"); if (v) System.out.println("wrote " + cellcounter + " lines to file"); if (v) System.out.println("Traces_05: del "+ del); if (reprocess) { if (inFile.charAt(FILE_NAME_LENGTH)=='_') (new File(inFile)).delete(); //and delete the old file except original data inFile = inFile.substring(0,FILE_NAME_LENGTH)+"_"+iters +"B"; //force a new file to be output each time if (v) System.out.println(inFile); process(outFile,inFile); //call process again with newly minted data file } } catch (IOException e) { System.out.println ( "IO Exception =: " + e ); } } public void writeDiamond(String fname) { PrintWriter writer; DataInputStream reader; try { writer = new PrintWriter(fname); reader = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(fname.substring(0,FILE_NAME_LENGTH)+"_"+(iters-1)+"B")))); //try to open the final binary diamond int[] attrs = new int[d];//somewhere to store each line of fact table // int x = reader.available(); try { while (true) { // x > 0) { // int offset = 0; for (int i = 0; i < d; ++i) { attrs[i] = reader.readInt(); //read in a line of the fact table // x-=4; //another integer read from file } for (int i: attrs) writer.print(i+" "); writer.println(); } } catch (EOFException eof) { //no need to see this System.out.println("end of file"); } reader.close(); writer.close(); //mv the most recently generated file to one with a consistent name so we can use it in a binary search for kappa Runtime rt = Runtime.getRuntime(); fname = fname.substring(0,FILE_NAME_LENGTH)+"_"+(iters-1)+"B"; String cmd = ""; if (fname.contains("intermediate")) cmd = "mv "+fname+" " + fname.substring(0,FILE_NAME_LENGTH-13) + "_intermediate.norm" ; else cmd = "mv "+fname+" " + fname.substring(0,FILE_NAME_LENGTH) + "_intermediate.norm" ; if (v) System.out.println("executing: " + cmd); Process p = rt.exec(cmd); } catch (Exception anyoldthing) { System.out.println(anyoldthing.getMessage()); } } /** save the count information in human readable format**/ public void writeDiamondCounts(String fname) { PrintWriter writer; try { writer = new PrintWriter(new BufferedWriter(new FileWriter(fname))); for (int index = 0; index < d; ++index) { writer.println("\n********* dimension "+index); int size = 0; for ( int j = 0; j < data[index].length; ++j) if (data[index][j] !=0) { size++; writer.println(j + "\t" + data[index][j]); } writer.println("******** size of dimension " + index + ": "+size +" ******************"); } writer.close(); } catch (IOException ie) { System.out.println("inside writeDims"); ie.printStackTrace(); } } private boolean isValidDiamond() { System.out.println("number of cells: " + cellcounter/d); for ( int i= 0; i < d; ++i) { int sum = 0; for (int j = 0; j < data[i].length; ++j) { sum = sum + data[i][j]; } if (sum != cellcounter/d) return false; } return true; } /** writeArrays @param fname: String filename write current counts to file 'fname.ext_array' output file can then be used as input to DBCount_binary.java when doing a binary search for kappa format of output file is packed integers numberOfDimensions dim0Size dim1Size ...dimnSize attr counts for dim0; attr counts for dim1 .... attr counts for dimn **/ public void writeArrays(String fname) { if (fname.contains("intermediate")) fname = fname.substring(0,FILE_NAME_LENGTH-13) +"_intermediate_array"; else fname = fname +"_intermediate_array"; File file = null; try { file = new File(fname); FileOutputStream file_output = new FileOutputStream(file); // Wrap the FileOutputStream with a DataOutputStream DataOutputStream data_out = new DataOutputStream(file_output); // Write the data to the file data_out.writeInt(d); //number dims for (int i = 0; i < d; ++i) data_out.writeInt(data[i].length); //dim sizes for (int i=0; i < d; i++) { for (int j = 0; j