diff --git a/p4/.classpath b/p4/.classpath
new file mode 100644
index 0000000..39abf1c
--- /dev/null
+++ b/p4/.classpath
@@ -0,0 +1,49 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/p4/.project b/p4/.project
new file mode 100644
index 0000000..fc4eb90
--- /dev/null
+++ b/p4/.project
@@ -0,0 +1,23 @@
+
+
+ p4
+
+
+
+
+
+ org.eclipse.jdt.core.javabuilder
+
+
+
+
+ org.eclipse.m2e.core.maven2Builder
+
+
+
+
+
+ org.eclipse.jdt.core.javanature
+ org.eclipse.m2e.core.maven2Nature
+
+
diff --git a/p4/.settings/org.eclipse.jdt.apt.core.prefs b/p4/.settings/org.eclipse.jdt.apt.core.prefs
new file mode 100644
index 0000000..d4313d4
--- /dev/null
+++ b/p4/.settings/org.eclipse.jdt.apt.core.prefs
@@ -0,0 +1,2 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.apt.aptEnabled=false
diff --git a/p4/.settings/org.eclipse.jdt.core.prefs b/p4/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..1b6e1ef
--- /dev/null
+++ b/p4/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,9 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
+org.eclipse.jdt.core.compiler.compliance=1.8
+org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
+org.eclipse.jdt.core.compiler.processAnnotations=disabled
+org.eclipse.jdt.core.compiler.release=disabled
+org.eclipse.jdt.core.compiler.source=1.8
diff --git a/p4/.settings/org.eclipse.m2e.core.prefs b/p4/.settings/org.eclipse.m2e.core.prefs
new file mode 100644
index 0000000..f897a7f
--- /dev/null
+++ b/p4/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
diff --git a/p4/Makefile b/p4/Makefile
new file mode 100644
index 0000000..1035657
--- /dev/null
+++ b/p4/Makefile
@@ -0,0 +1,15 @@
+.PHONY=clean build submit
+
+all: package submit
+
+clean:
+ mvn clean
+
+build:
+ mvn package
+
+submit:
+ spark-submit --class cs448.App target/p4-1.0-SNAPSHOT.jar -i "/user/mocull/input"
+
+warmup:
+ spark-submit --class cs448.App target/p4-1.0-SNAPSHOT.jar -i "/user/mocull/input" -warmup
diff --git a/p4/src/main/java/cs448/Project4.java b/p4/src/main/java/cs448/Project4.java
index 3b46cc1..04853fe 100644
--- a/p4/src/main/java/cs448/Project4.java
+++ b/p4/src/main/java/cs448/Project4.java
@@ -1,5 +1,25 @@
package cs448;
+import org.apache.commons.cli.*;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.Optional;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import scala.Tuple2;
+import scala.Tuple4;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.List;
+
public class Project4 {
public void runSparkApp1(App.Conf conf){
System.out.println("Running Your First Spark App!");
@@ -8,10 +28,37 @@ public class Project4 {
*/
// Create a Spark Session.
+ SparkSession spark = SparkSession.builder().appName("CS 448 Project 4 -- Query 1").getOrCreate();
// Write data processing code here
+ String dataFiles[] = {conf.usersFName, conf.moviesFName, conf.ratingsFName};
+ Dataset data;
+
+ //// Reading, Parsing and counting lines for each of the data files
+ JavaRDD userRDD = spark.read().textFile(CS448Utils.resolveUri(conf.inPath,conf.usersFName)).cache()
+ .javaRDD()
+ .map(User::parseUser);
+ Dataset userDF = spark.createDataFrame(userRDD, User.class);
+ userDF.createOrReplaceTempView("User");
+
+ JavaRDD movieRDD = spark.read().textFile(CS448Utils.resolveUri(conf.inPath,conf.moviesFName)).cache()
+ .javaRDD()
+ .map(Movie::parseMovie);
+ Dataset movieDF = spark.createDataFrame(movieRDD, Movie.class);
+ movieDF.createOrReplaceTempView("Movie");
+
+ JavaRDD ratingRDD = spark.read().textFile(CS448Utils.resolveUri(conf.inPath,conf.ratingsFName)).cache()
+ .javaRDD()
+ .map(Rating::parseRating);
+ Dataset ratingDF = spark.createDataFrame(ratingRDD, Rating.class);
+ ratingDF.createOrReplaceTempView("Rating");
+
+ Dataset resultDF = spark.sql("SELECT DISTINCT m.title FROM Movie m, Rating r, User u WHERE m.movieId = r.movieId AND r.userId = u.userId AND u.occupation = " +
+ conf.q1Occupation + " AND r.rating >= " + conf.q1Rating);
+ resultDF.show();
//Don't forget to stop spark session
+ spark.stop();
}
public void runSparkApp2(App.Conf conf){
diff --git a/p4/target/maven-archiver/pom.properties b/p4/target/maven-archiver/pom.properties
new file mode 100644
index 0000000..03873ea
--- /dev/null
+++ b/p4/target/maven-archiver/pom.properties
@@ -0,0 +1,4 @@
+#Created by Apache Maven 3.6.0
+version=1.0-SNAPSHOT
+groupId=cs448
+artifactId=p4
diff --git a/p4/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/p4/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
new file mode 100644
index 0000000..b95557e
--- /dev/null
+++ b/p4/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
@@ -0,0 +1,7 @@
+cs448/User.class
+cs448/Rating.class
+cs448/App$Conf.class
+cs448/App.class
+cs448/Project4.class
+cs448/CS448Utils.class
+cs448/Movie.class
diff --git a/p4/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/p4/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
new file mode 100644
index 0000000..b8af298
--- /dev/null
+++ b/p4/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
@@ -0,0 +1,6 @@
+/home/max/src/cs448/p4/src/main/java/cs448/Rating.java
+/home/max/src/cs448/p4/src/main/java/cs448/Movie.java
+/home/max/src/cs448/p4/src/main/java/cs448/CS448Utils.java
+/home/max/src/cs448/p4/src/main/java/cs448/User.java
+/home/max/src/cs448/p4/src/main/java/cs448/App.java
+/home/max/src/cs448/p4/src/main/java/cs448/Project4.java
diff --git a/p4/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst b/p4/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst
new file mode 100644
index 0000000..e69de29
diff --git a/p4/target/p4-1.0-SNAPSHOT.jar b/p4/target/p4-1.0-SNAPSHOT.jar
new file mode 100644
index 0000000..4b7e749
Binary files /dev/null and b/p4/target/p4-1.0-SNAPSHOT.jar differ