/** DBCount_binary.java
Processes data and generates a count diamond. No measures.
Copyright (C) 2008 Hazel Webb hazel.webb@unb.ca
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
**/
/**
* DBCount_binary.java
* input: 1) preprocessed binary file (output of DBCountPreprocessor.java)
* format of input file:
* number of dims, cardinalities for each dim, data - all integers
* 2) k: number of carats
* May 7, 2008
* Hazel Webb
* January 19 2009 comments updated and I/O made consistent with Kumar.java
********************************************************************************/
import java.util.*;
import java.io.*;
public class DBCount_binary {
private int d;
//number of data dims
private int K;
//number of carats
private int iters = 0;
//count number of times the file is processed until convergence
private boolean v = false;
// for noisy output
private boolean m = false;
//for collecting metadata
private String file;
//to store input file name from command line
int[][] data;
//ragged array of arrays storing counts of the attrvals
private long cellcounter;
private int FILE_NAME_LENGTH;
public static void main(String[] args) {
DBCount_binary diamond = new DBCount_binary();
Hashtable hashFlags = new Hashtable();
//hardcode maps from flag strings to integers
hashFlags.put("-filename",new Integer(0));
hashFlags.put("-f",new Integer(0));
hashFlags.put("-k",new Integer(1));
hashFlags.put("-h", new Integer(3));
hashFlags.put("-v", new Integer(2));
hashFlags.put("-m", new Integer(4));
if (args.length < 2) {
System.out.println("Usage: DBCount -f filename -k carats [-v -m ]");
System.exit(1);
}
int iarg;
for (int a=0; a 0) {
data[j][attrs[j]]--;
//decrement the count
if (data[j][attrs[j]] < K)
data[j][attrs[j]] = 0;
//this should be deleted on next iter
}
}
}
if (del) break;
//only delete this row once
}
}
if (!del) {
for (int v: attrs) {
data_out.writeInt(v);
//write this row to new diamond file
cellcounter++;
}
}
}
} catch (EOFException eof) {
//no need to see this System.out.println("end of file");
}
if (m) {
//end iteration timing
iterEndTime = System.currentTimeMillis();
double iterTime = (iterEndTime - iterStartTime)/1000.0;
System.out.printf("Time for processing iter %d with %d cells is %f\n", iters, cellcounter/d,iterTime);
}
data_in.close ();
data_out.close();
if (v) System.out.println("read " + linesread + " lines from file");
if (v) System.out.println("wrote " + cellcounter + " lines to file");
if (v) System.out.println("Traces_05: del "+ del);
if (reprocess) {
if (inFile.charAt(FILE_NAME_LENGTH)=='_')
(new File(inFile)).delete();
//and delete the old file except original data
inFile = inFile.substring(0,FILE_NAME_LENGTH)+"_"+iters +"B";
//force a new file to be output each time
if (v) System.out.println(inFile);
process(outFile,inFile);
//call process again with newly minted data file
}
}
catch (IOException e) {
System.out.println ( "IO Exception =: " + e );
}
}
public void writeDiamond(String fname) {
PrintWriter writer;
DataInputStream reader;
try {
writer = new PrintWriter(fname);
reader = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(fname.substring(0,FILE_NAME_LENGTH)+"_"+(iters-1)+"B"))));
//try to open the final binary diamond
int[] attrs = new int[d];//somewhere to store each line of fact table
// int x = reader.available();
try {
while (true) { // x > 0) {
// int offset = 0;
for (int i = 0; i < d; ++i) {
attrs[i] = reader.readInt();
//read in a line of the fact table
// x-=4; //another integer read from file
}
for (int i: attrs)
writer.print(i+" ");
writer.println();
}
} catch (EOFException eof) {
//no need to see this System.out.println("end of file");
}
reader.close();
writer.close();
//mv the most recently generated file to one with a consistent name so we can use it in a binary search for kappa
Runtime rt = Runtime.getRuntime();
fname = fname.substring(0,FILE_NAME_LENGTH)+"_"+(iters-1)+"B";
String cmd = "";
if (fname.contains("intermediate"))
cmd = "mv "+fname+" " + fname.substring(0,FILE_NAME_LENGTH-13) + "_intermediate.norm" ;
else
cmd = "mv "+fname+" " + fname.substring(0,FILE_NAME_LENGTH) + "_intermediate.norm" ;
if (v) System.out.println("executing: " + cmd);
Process p = rt.exec(cmd);
}
catch (Exception anyoldthing) {
System.out.println(anyoldthing.getMessage());
}
}
/** save the count information in human readable format**/
public void writeDiamondCounts(String fname) {
PrintWriter writer;
try {
writer = new PrintWriter(new BufferedWriter(new FileWriter(fname)));
for (int index = 0; index < d; ++index) {
writer.println("\n********* dimension "+index);
int size = 0;
for ( int j = 0; j < data[index].length; ++j)
if (data[index][j] !=0) {
size++;
writer.println(j + "\t" + data[index][j]);
}
writer.println("******** size of dimension " + index + ": "+size +" ******************");
}
writer.close();
} catch (IOException ie) {
System.out.println("inside writeDims");
ie.printStackTrace();
}
}
private boolean isValidDiamond() {
System.out.println("number of cells: " + cellcounter/d);
for ( int i= 0; i < d; ++i) {
int sum = 0;
for (int j = 0; j < data[i].length; ++j) {
sum = sum + data[i][j];
}
if (sum != cellcounter/d)
return false;
}
return true;
}
/**
writeArrays
@param fname: String filename
write current counts to file 'fname.ext_array'
output file can then be used as input to DBCount_binary.java when doing a binary search for kappa
format of output file is packed integers
numberOfDimensions dim0Size dim1Size ...dimnSize
attr counts for dim0; attr counts for dim1 .... attr counts for dimn
**/
public void writeArrays(String fname) {
if (fname.contains("intermediate"))
fname = fname.substring(0,FILE_NAME_LENGTH-13) +"_intermediate_array";
else
fname = fname +"_intermediate_array";
File file = null;
try {
file = new File(fname);
FileOutputStream file_output = new FileOutputStream(file);
// Wrap the FileOutputStream with a DataOutputStream
DataOutputStream data_out = new DataOutputStream(file_output);
// Write the data to the file
data_out.writeInt(d);
//number dims
for (int i = 0; i < d; ++i)
data_out.writeInt(data[i].length);
//dim sizes
for (int i=0; i < d; i++) {
for (int j = 0; j