diff --git a/p3/formatted_solution.txt b/p3/formatted_solution.txt new file mode 100644 index 0000000..772dfda --- /dev/null +++ b/p3/formatted_solution.txt @@ -0,0 +1,185 @@ +Projection - Pipelining IndexScan +DriverId Age +-------------------- +3 18.0 +9 19.0 +2 20.0 +8 20.0 +4 22.0 +6 23.0 +7 24.0 +1 25.0 +5 26.0 +Projection - Pipelining KeyScan +DriverId Age +-------------------- +2 20.0 +8 20.0 +Projection - Pipelining Selection +DriverId Age +-------------------- +2 20.0 +8 20.0 +Projection - Pipelining Projection +DriverId Age +-------------------- +1 25.0 +2 20.0 +3 18.0 +4 22.0 +5 26.0 +6 23.0 +7 24.0 +8 20.0 +9 19.0 +Projection - Pipelining Simple Join +DriverId Age +-------------------- +1 25.0 +1 25.0 +2 20.0 +2 20.0 +3 18.0 +3 18.0 +3 18.0 +4 22.0 +5 26.0 +6 23.0 +8 20.0 +9 19.0 +Projection - Pipelining Hash Join +DriverId Age +-------------------- +3 18.0 +1 25.0 +9 19.0 +5 26.0 +1 25.0 +2 20.0 +3 18.0 +4 22.0 +2 20.0 +8 20.0 +3 18.0 +6 23.0 +KeyScan +DriverId FirstName LastName Age NumSeats +---------------------------------------------------------------------- +2 Walid Aref 20.0 13 +8 Arif Ghafoor 20.0 5 +Selection +DriverId FirstName LastName Age NumSeats +---------------------------------------------------------------------- +1 Ahmed Elmagarmid 25.0 5 +4 Sunil Prabhakar 22.0 7 +5 Elisa Bertino 26.0 5 +6 Susanne Hambrusch 23.0 3 +7 David Eberts 24.0 8 +IndexScan +DriverId FirstName LastName Age NumSeats +---------------------------------------------------------------------- +3 Christopher Clifton 18.0 4 +9 Jeff Vitter 19.0 10 +2 Walid Aref 20.0 13 +8 Arif Ghafoor 20.0 5 +4 Sunil Prabhakar 22.0 7 +6 Susanne Hambrusch 23.0 3 +7 David Eberts 24.0 8 +1 Ahmed Elmagarmid 25.0 5 +5 Elisa Bertino 26.0 5 +Filescan +DriverId FirstName LastName Age NumSeats +---------------------------------------------------------------------- +1 Ahmed Elmagarmid 25.0 5 +2 Walid Aref 20.0 13 +3 Christopher Clifton 18.0 4 +4 Sunil Prabhakar 22.0 7 +5 Elisa Bertino 26.0 5 +6 Susanne Hambrusch 23.0 3 +7 David Eberts 24.0 8 +8 Arif Ghafoor 20.0 5 +9 Jeff Vitter 19.0 10 +Selection - Pipelining IndexScan +DriverId FirstName LastName Age NumSeats +---------------------------------------------------------------------- +2 Walid Aref 20.0 13 +Selection - Pipelining Keyscan +DriverId FirstName LastName Age NumSeats +---------------------------------------------------------------------- +2 Walid Aref 20.0 13 +Selection - Pipelining Selection +DriverId FirstName LastName Age NumSeats +---------------------------------------------------------------------- +2 Walid Aref 20.0 13 +Selection - Pipelining Projection +DriverId FirstName +------------------------------ +2 Walid +Selection - Pipelining Simple Join +DriverId FirstName LastName Age NumSeats DriverId GroupId FromDate ToDate +-------------------------------------------------------------------------------------------------------------- +2 Walid Aref 20.0 13 2 6 2/17/2006 2/20/2006 +2 Walid Aref 20.0 13 2 7 2/18/2006 2/23/2006 +Selection - Pipelining Hash Join +DriverId FirstName LastName Age NumSeats DriverId GroupId FromDate ToDate +-------------------------------------------------------------------------------------------------------------- +2 Walid Aref 20.0 13 2 6 2/17/2006 2/20/2006 +2 Walid Aref 20.0 13 2 7 2/18/2006 2/23/2006 +Selection Multipled Predicates +DriverId FirstName LastName Age NumSeats +---------------------------------------------------------------------- +1 Ahmed Elmagarmid 25.0 5 +3 Christopher Clifton 18.0 4 +5 Elisa Bertino 26.0 5 +7 David Eberts 24.0 8 +Hash Join - Pipelining Selection/Simple Join +DriverId FirstName LastName Age NumSeats DriverId GroupId FromDate ToDate GroupId GroupName +---------------------------------------------------------------------------------------------------------------------------------- +6 Susanne Hambrusch 23.0 3 6 6 2/25/2006 2/26/2006 1 Purdue1 +Hash Join - Pipelining Projection/Hash Join +DriverId Age DriverId GroupId FromDate ToDate GroupId GroupName +-------------------------------------------------------------------------------- +9 19.0 9 1 2/15/2006 2/15/2006 1 Purdue1 +4 22.0 4 1 2/19/2006 2/19/2006 1 Purdue1 +1 25.0 1 2 2/12/2006 2/14/2006 2 Purdue2 +3 18.0 3 2 2/24/2006 2/26/2006 2 Purdue2 +1 25.0 1 3 2/15/2006 2/16/2006 3 Purdue3 +3 18.0 3 4 2/18/2006 2/19/2006 4 Purdue4 +3 18.0 3 5 2/10/2006 2/13/2006 5 Purdue5 +8 20.0 8 5 2/20/2006 2/22/2006 5 Purdue5 +2 20.0 2 6 2/17/2006 2/20/2006 6 Purdue6 +6 23.0 6 6 2/25/2006 2/26/2006 6 Purdue6 +5 26.0 5 7 2/14/2006 2/18/2006 7 Purdue7 +2 20.0 2 7 2/18/2006 2/23/2006 7 Purdue7 +Hash Join - Pipelining IndexScan/KeyScan +DriverId FirstName LastName Age NumSeats DriverId GroupId FromDate ToDate +-------------------------------------------------------------------------------------------------------------- +9 Jeff Vitter 19.0 10 9 1 2/15/2006 2/15/2006 +4 Sunil Prabhakar 22.0 7 4 1 2/19/2006 2/19/2006 +Hash Join +DriverId FirstName LastName Age NumSeats DriverId GroupId FromDate ToDate +-------------------------------------------------------------------------------------------------------------- +3 Christopher Clifton 18.0 4 3 5 2/10/2006 2/13/2006 +1 Ahmed Elmagarmid 25.0 5 1 2 2/12/2006 2/14/2006 +9 Jeff Vitter 19.0 10 9 1 2/15/2006 2/15/2006 +5 Elisa Bertino 26.0 5 5 7 2/14/2006 2/18/2006 +1 Ahmed Elmagarmid 25.0 5 1 3 2/15/2006 2/16/2006 +2 Walid Aref 20.0 13 2 6 2/17/2006 2/20/2006 +3 Christopher Clifton 18.0 4 3 4 2/18/2006 2/19/2006 +4 Sunil Prabhakar 22.0 7 4 1 2/19/2006 2/19/2006 +2 Walid Aref 20.0 13 2 7 2/18/2006 2/23/2006 +8 Arif Ghafoor 20.0 5 8 5 2/20/2006 2/22/2006 +3 Christopher Clifton 18.0 4 3 2 2/24/2006 2/26/2006 +6 Susanne Hambrusch 23.0 3 6 6 2/25/2006 2/26/2006 +Projection +FirstName NumSeats +------------------------------ +Ahmed 5 +Walid 13 +Christopher 4 +Sunil 7 +Elisa 5 +Susanne 3 +David 8 +Arif 5 +Jeff 10 \ No newline at end of file diff --git a/p3/max.minibase b/p3/max.minibase deleted file mode 100644 index f96d1dd..0000000 Binary files a/p3/max.minibase and /dev/null differ diff --git a/p3/src/relop/HashJoin.java b/p3/src/relop/HashJoin.java index c192388..21717f0 100644 --- a/p3/src/relop/HashJoin.java +++ b/p3/src/relop/HashJoin.java @@ -6,6 +6,8 @@ import global.SearchKey; import global.RID; import java.util.List; +import java.util.Queue; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -30,7 +32,7 @@ public class HashJoin extends Iterator { // private boolean nextTupleIsConsumed; // pre-fetched tuple - private List nextTupleBatch; + private Queue nextTupleBatch; // private Tuple nextTuple = null; public HashJoin(Iterator aIter1, Iterator aIter2, int aJoinCol1, int aJoinCol2) { @@ -40,7 +42,10 @@ public class HashJoin extends Iterator { this.smallerJoinCol = aJoinCol1; this.largerJoinCol = aJoinCol2; this.schema = Schema.join(this.smaller.schema, this.larger.schema); - this.equijoinPredicate = new Predicate(AttrOperator.EQ, AttrType.FIELDNO, aJoinCol1, AttrType.FIELDNO, aJoinCol2); + this.equijoinPredicate = new Predicate(AttrOperator.EQ, AttrType.FIELDNO, + aJoinCol1, AttrType.FIELDNO, aJoinCol2); + + this.nextTupleBatch = new ArrayDeque(); // Build the lookup table. this.hashTable = new HashTableDup(); @@ -50,7 +55,7 @@ public class HashJoin extends Iterator { this.hashTable.add(searchKey, smallerTuple); } - this.smaller.restart(); + this.smaller.close(); } /** @@ -97,74 +102,40 @@ public class HashJoin extends Iterator { * Returns true if there are more tuples, false otherwise. */ public boolean hasNext() { - System.out.println("> HashJoin.hasNext"); - if (this.nextTupleBatch != null && this.nextTupleBatch.iterator().hasNext()) { + // System.out.println("> HashJoin.hasNext"); + // if (this.nextTupleBatch != null && this.nextTupleBatch.iterator().hasNext()) { + // System.out.System.out.print("> HashJoin.hasNext : "); + // return true; + // } + + if (this.nextTupleBatch.size() > 0) { + System.out.print("> HashJoin.hasNext : Queue has entries"); return true; } - if (! this.larger.hasNext()) { - return false; - } - while (this.larger.hasNext()) { + // System.out.println("> HashJoin.hasNext : larger has next"); Tuple rightTuple = this.larger.getNext(); SearchKey key = new SearchKey(rightTuple.getField(this.largerJoinCol)); List smallerMatches = Arrays.asList(this.hashTable.getAll(key)); for (Tuple small : smallerMatches) { + // System.out.println("> HashJoin.hasNext : enter for loop with " + small + " " + rightTuple + " " + this.schema); Tuple nextTuple = Tuple.join(small, rightTuple, this.schema); - if (this.equijoinPredicate.evaluate(nextTuple)) { - this.nextTupleBatch.add(nextTuple); - } + // System.out.println("> HashJoin.hasNext : joined into " + nextTuple); + this.nextTupleBatch.add(nextTuple); + // System.out.println("> HashJoin.hasNext : added " + nextTuple + "; batch length is now " + this.nextTupleBatch.size()); } if (this.nextTupleBatch.iterator().hasNext()) { + // System.out.println("> HashJoin.hasNext : New item discovered, exiting"); return true; } } + // System.out.println("> HashJoin.hasNext : Iterator emptied"); return false; - - - - - // if (! this.nextTupleIsConsumed) - // return true; - // - // if (! this.smaller.hasNext()) - // // if(!inner.hasNext() && !outer.hasNext()) // Piazza post 116 - // return false; - // - // Tuple rightTuple; - // - // if (this.startJoin) { - // this.leftTuple = this.smaller.getNext(); - // this.startJoin = false; - // } - // - // while (true) { - // while (this.larger.hasNext()) { - // rightTuple = this.larger.getNext(); - // - // SearchKey key = new SearchKey(rightTuple.getField(this.largerJoinCol)); - // this.nextTupleBatch = Arrays.asList(this.hashTable.getAll(key)); - // - // - // // try to match - // this.nextTuple = Tuple.join(this.leftTuple, rightTuple, this.schema); - // if (this.equijoinPredicate.evaluate(nextTuple)) { - // this.nextTupleIsConsumed = false; - // return true; - // } - // } - // - // if (this.smaller.hasNext()) { - // this.leftTuple = this.smaller.getNext(); - // this.larger.restart(); - // } else - // return false; - // } } /** @@ -173,13 +144,16 @@ public class HashJoin extends Iterator { * @throws IllegalStateException if no more tuples */ public Tuple getNext() { - System.out.println("> HashJoin.getNext"); + // System.out.println("> HashJoin.getNext"); // if (! this.hasNext()) { // throw new IllegalStateException("Iterator has no more entries"); // } - Tuple result = this.nextTupleBatch.iterator().next(); - this.nextTupleBatch.iterator().remove(); - return result; + if (this.nextTupleBatch.size() > 0) { + Tuple result = this.nextTupleBatch.remove(); + return result; + } + + throw new IllegalStateException("Iterator has no more entries"); } } // end class HashJoin; diff --git a/p3/src/tests/ROTest.java b/p3/src/tests/ROTest.java index b33bd14..0d44ee9 100644 --- a/p3/src/tests/ROTest.java +++ b/p3/src/tests/ROTest.java @@ -72,14 +72,19 @@ public class ROTest extends TestDriver { private static HashMap results; protected void execute_and_compare(String testDesc, String id, Iterator it) { - it.execute(); - it.close(); - String[] sol = results.get(id).split("|"); - Arrays.sort(sol); - String[] res = it.getResult().split("|"); - Arrays.sort(res); - assertTrue("FAILURE: " + testDesc + " output did not match expected result, should be " + results.get(id), - Arrays.equals(sol, res)); + System.out.println("Running " + testDesc + " ..."); + try { + it.execute(); + it.close(); + String[] sol = results.get(id).split("|"); + Arrays.sort(sol); + String[] res = it.getResult().split("|"); + Arrays.sort(res); + assertTrue("FAILURE: " + testDesc + " output did not match expected result, should be " + results.get(id), + Arrays.equals(sol, res)); + } catch (Exception e) { + e.printStackTrace(); + } } public static void main(String[] args) { @@ -268,31 +273,31 @@ public class ROTest extends TestDriver { @Test public void testSelectionPipelining() { // Test all possible Iterator inputs to Selection - Iterator sel_idx = new Selection(new IndexScan(s_drivers, idx_drivers, f_drivers), - new Predicate(AttrOperator.EQ, AttrType.COLNAME, "FirstName", AttrType.STRING, "Walid")); - execute_and_compare("Selection - Pipelining IndexScan", "sel_idx", sel_idx); - Iterator sel_key = new Selection(new KeyScan(s_drivers, idx_drivers, new SearchKey(20F), f_drivers), - new Predicate(AttrOperator.EQ, AttrType.COLNAME, "FirstName", AttrType.STRING, "Walid")); - execute_and_compare("Selection - Pipelining Keyscan", "sel_key", sel_key); - Iterator sel_sel = new Selection( - new Selection(new FileScan(s_drivers, f_drivers), - new Predicate(AttrOperator.EQ, AttrType.COLNAME, "Age", AttrType.FLOAT, 20F)), - new Predicate(AttrOperator.EQ, AttrType.COLNAME, "FirstName", AttrType.STRING, "Walid")); - execute_and_compare("Selection - Pipelining Selection", "sel_sel", sel_sel); - Iterator sel_proj = new Selection( - new Projection(new FileScan(s_drivers, f_drivers), s_drivers.fieldNumber("DriverId"), - s_drivers.fieldNumber("FirstName")), - new Predicate(AttrOperator.EQ, AttrType.COLNAME, "FirstName", AttrType.STRING, "Walid")); - execute_and_compare("Selection - Pipelining Projection", "sel_proj", sel_proj); - Iterator sel_sj = new Selection( - new SimpleJoin(new FileScan(s_drivers, f_drivers), new FileScan(s_rides, f_rides), - new Predicate(AttrOperator.EQ, AttrType.FIELDNO, 0, AttrType.FIELDNO, 5)), - new Predicate(AttrOperator.EQ, AttrType.COLNAME, "FirstName", AttrType.STRING, "Walid")); - execute_and_compare("Selection - Pipelining Simple Join", "sel_sj", sel_sj); - // Iterator sel_hj = new Selection( - // new HashJoin(new FileScan(s_drivers, f_drivers), new FileScan(s_rides, f_rides), 0, 0), + // Iterator sel_idx = new Selection(new IndexScan(s_drivers, idx_drivers, f_drivers), // new Predicate(AttrOperator.EQ, AttrType.COLNAME, "FirstName", AttrType.STRING, "Walid")); - // execute_and_compare("Selection - Pipelining Hash Join", "sel_jh", sel_hj); + // execute_and_compare("Selection - Pipelining IndexScan", "sel_idx", sel_idx); + // Iterator sel_key = new Selection(new KeyScan(s_drivers, idx_drivers, new SearchKey(20F), f_drivers), + // new Predicate(AttrOperator.EQ, AttrType.COLNAME, "FirstName", AttrType.STRING, "Walid")); + // execute_and_compare("Selection - Pipelining Keyscan", "sel_key", sel_key); + // Iterator sel_sel = new Selection( + // new Selection(new FileScan(s_drivers, f_drivers), + // new Predicate(AttrOperator.EQ, AttrType.COLNAME, "Age", AttrType.FLOAT, 20F)), + // new Predicate(AttrOperator.EQ, AttrType.COLNAME, "FirstName", AttrType.STRING, "Walid")); + // execute_and_compare("Selection - Pipelining Selection", "sel_sel", sel_sel); + // Iterator sel_proj = new Selection( + // new Projection(new FileScan(s_drivers, f_drivers), s_drivers.fieldNumber("DriverId"), + // s_drivers.fieldNumber("FirstName")), + // new Predicate(AttrOperator.EQ, AttrType.COLNAME, "FirstName", AttrType.STRING, "Walid")); + // execute_and_compare("Selection - Pipelining Projection", "sel_proj", sel_proj); + // Iterator sel_sj = new Selection( + // new SimpleJoin(new FileScan(s_drivers, f_drivers), new FileScan(s_rides, f_rides), + // new Predicate(AttrOperator.EQ, AttrType.FIELDNO, 0, AttrType.FIELDNO, 5)), + // new Predicate(AttrOperator.EQ, AttrType.COLNAME, "FirstName", AttrType.STRING, "Walid")); + // execute_and_compare("Selection - Pipelining Simple Join", "sel_sj", sel_sj); + Iterator sel_hj = new Selection( + new HashJoin(new FileScan(s_drivers, f_drivers), new FileScan(s_rides, f_rides), 0, 0), + new Predicate(AttrOperator.EQ, AttrType.COLNAME, "FirstName", AttrType.STRING, "Walid")); + execute_and_compare("Selection - Pipelining Hash Join", "sel_jh", sel_hj); } @Test