/*************************************** DBSQL4.java implementation of almost algo 2 in sql using jdbc for convenience Only supplementary data structure a stack to be deleted Sets a delete flag to true when cell is deleted from cube Generic implementation: input: filename: csv file with format attr0,attr1...attrn with no headers or metadata k: carats to process for i: data are integers s: data are strings c: connect string - format: jdbc:mysql//hostname/database u: mysql user name, defaults to hazel if not set p: mysql password, defaults to diamond if not set cells: number of cells in the cube. Needed as we will rebuild when half the cells have been deleted output: database table (filename2) k-diamond database table (filename) original data written by Hazel February 8, 2009 tried to improve query performance April 30, 2009 updated to handle delete flag instead of deleting whole cell October 28, 2009 rebuilds when 50% 25% 12% 6% 3% 1% cells are marked as deleted October 30 2009 **************************************/ import java.sql.*; import java.util.*; public class DBSQL4 { private static Statement stmt; private static Statement countStmt; private static Statement deleteStmt; private static ResultSet rs; //use stacks to track attrs to be deleted private static String table; private static String table_base; private static int k; //carats private static boolean v = false;//for verbose execution private static int d; //number of dimensions private static boolean integers = false; private static boolean strings = false; private static Connection con; private static String connectString = "jdbc:mysql://localhost/YOUR-DATABASE-NAME"; private static String user = "XXX";//use your own information here private static String pwd = "XXX";//use your own information here private static int cells = 0;//number of cells in cube private static int itersCompleted = 0;//counts iterations ready for rebuild private static final double FACTOR = 0.75; //int DIVISOR = 2; private static int cellsDeleted = 0; //default connection can be overridden by cmd line connection string, user, password public static void main(String[] args) { DBSQL4 proc = new DBSQL4(); Hashtable hashFlags = new Hashtable(); //hardcode maps from flag strings to integers hashFlags.put("-filename", new Integer(0)); hashFlags.put("-f", new Integer(0)); hashFlags.put("-k", new Integer(1)); hashFlags.put("-v", new Integer(2)); hashFlags.put("-h", new Integer(3)); hashFlags.put("-i", new Integer(4)); hashFlags.put("-s", new Integer(5)); hashFlags.put("-c", new Integer(6)); hashFlags.put("-u", new Integer(7)); hashFlags.put("-p", new Integer(8)); hashFlags.put("-cells", new Integer(9)); if (args.length < 5) { System.out.println("Usage: DBsqlCount -f filename -k carats -i|-s [-iters rebuild after iters iterations -h help -v verbose -c connect string jdbc:mysql://hostname/database -u user -p password]"); System.exit(1); } int iarg; for (int a=0; a 0 && cellsDeleted > 0 && cellsDeleted > cells*FACTOR) { if (table.charAt(table.length()-1) == '2') table_base = table.substring(0,table.length()-1); else table_base = table +"2"; //rebuild the table and use table2 from now on to process deleteStmt.executeUpdate("DELETE FROM " + table_base); //clear out any old data then insert the next iteration deleteStmt.executeUpdate("INSERT into " + table_base +" SELECT * from " + table +" WHERE flag = false"); System.out.println("rebuilt table when " + cellsDeleted +" cells were deleted"); table = table_base; cells = cells - cellsDeleted;//rebuild the table as often as necessary swapping between table and table2 cellsDeleted = 0;//reset the counter } //ensure we have no leftover data in temp stmt.executeUpdate("DELETE FROM temp"); //get attribute counts and store in temp int rows = countStmt.executeUpdate("INSERT INTO temp SELECT "+ col + " as col, count(" + col + ") as cnt FROM "+ table +" WHERE flag = false GROUP BY "+ col); if (v) System.out.println("INSERT INTO temp SELECT "+ col + " as col, count(" + col + ") as cnt FROM "+ table +" WHERE flag = false GROUP BY "+ col); if (rows > 0) { //delete attributes with count < k from table int del= deleteStmt.executeUpdate("UPDATE " + table +",temp SET flag = true WHERE temp.attr0 = "+table+"."+col+ " AND temp.attr1 < " +k); if (del > 0) { deletedSome= true; cellsDeleted = cellsDeleted + del; System.out.println("cellsDeleted and del " + cellsDeleted +" \t" + del); } } if (v) System.out.println("UPDATE " + table +", temp SET flag = true WHERE temp.attr0 = "+table+"."+col+ " AND temp.attr1 < " +k); } catch (SQLException sqle) { System.out.println("SQL Error in countAndDelete"); sqle.printStackTrace(); } catch (Exception err) { System.out.println("Something different went wrong"); err.printStackTrace(); } return deletedSome; } }