Query 1 init

This commit is contained in:
Max O'Cull 2019-04-20 20:23:11 -04:00
parent 3d940d00e6
commit cf8fef4b3a
12 changed files with 166 additions and 0 deletions

49
p4/.classpath Normal file
View File

@ -0,0 +1,49 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" path="target/generated-sources/annotations">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="ignore_optional_problems" value="true"/>
<attribute name="m2e-apt" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="target/generated-test-sources/test-annotations">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="ignore_optional_problems" value="true"/>
<attribute name="m2e-apt" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>

23
p4/.project Normal file
View File

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>p4</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>

View File

@ -0,0 +1,2 @@
eclipse.preferences.version=1
org.eclipse.jdt.apt.aptEnabled=false

View File

@ -0,0 +1,9 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
org.eclipse.jdt.core.compiler.processAnnotations=disabled
org.eclipse.jdt.core.compiler.release=disabled
org.eclipse.jdt.core.compiler.source=1.8

View File

@ -0,0 +1,4 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

15
p4/Makefile Normal file
View File

@ -0,0 +1,15 @@
.PHONY=clean build submit
all: package submit
clean:
mvn clean
build:
mvn package
submit:
spark-submit --class cs448.App target/p4-1.0-SNAPSHOT.jar -i "/user/mocull/input"
warmup:
spark-submit --class cs448.App target/p4-1.0-SNAPSHOT.jar -i "/user/mocull/input" -warmup

View File

@ -1,5 +1,25 @@
package cs448;
import org.apache.commons.cli.*;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.Optional;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
import scala.Tuple4;
import java.io.IOException;
import java.io.Serializable;
import java.util.List;
public class Project4 {
public void runSparkApp1(App.Conf conf){
System.out.println("Running Your First Spark App!");
@ -8,10 +28,37 @@ public class Project4 {
*/
// Create a Spark Session.
SparkSession spark = SparkSession.builder().appName("CS 448 Project 4 -- Query 1").getOrCreate();
// Write data processing code here
String dataFiles[] = {conf.usersFName, conf.moviesFName, conf.ratingsFName};
Dataset<String> data;
//// Reading, Parsing and counting lines for each of the data files
JavaRDD<User> userRDD = spark.read().textFile(CS448Utils.resolveUri(conf.inPath,conf.usersFName)).cache()
.javaRDD()
.map(User::parseUser);
Dataset<Row> userDF = spark.createDataFrame(userRDD, User.class);
userDF.createOrReplaceTempView("User");
JavaRDD<Movie> movieRDD = spark.read().textFile(CS448Utils.resolveUri(conf.inPath,conf.moviesFName)).cache()
.javaRDD()
.map(Movie::parseMovie);
Dataset<Row> movieDF = spark.createDataFrame(movieRDD, Movie.class);
movieDF.createOrReplaceTempView("Movie");
JavaRDD<Rating> ratingRDD = spark.read().textFile(CS448Utils.resolveUri(conf.inPath,conf.ratingsFName)).cache()
.javaRDD()
.map(Rating::parseRating);
Dataset<Row> ratingDF = spark.createDataFrame(ratingRDD, Rating.class);
ratingDF.createOrReplaceTempView("Rating");
Dataset<Row> resultDF = spark.sql("SELECT DISTINCT m.title FROM Movie m, Rating r, User u WHERE m.movieId = r.movieId AND r.userId = u.userId AND u.occupation = " +
conf.q1Occupation + " AND r.rating >= " + conf.q1Rating);
resultDF.show();
//Don't forget to stop spark session
spark.stop();
}
public void runSparkApp2(App.Conf conf){

View File

@ -0,0 +1,4 @@
#Created by Apache Maven 3.6.0
version=1.0-SNAPSHOT
groupId=cs448
artifactId=p4

View File

@ -0,0 +1,7 @@
cs448/User.class
cs448/Rating.class
cs448/App$Conf.class
cs448/App.class
cs448/Project4.class
cs448/CS448Utils.class
cs448/Movie.class

View File

@ -0,0 +1,6 @@
/home/max/src/cs448/p4/src/main/java/cs448/Rating.java
/home/max/src/cs448/p4/src/main/java/cs448/Movie.java
/home/max/src/cs448/p4/src/main/java/cs448/CS448Utils.java
/home/max/src/cs448/p4/src/main/java/cs448/User.java
/home/max/src/cs448/p4/src/main/java/cs448/App.java
/home/max/src/cs448/p4/src/main/java/cs448/Project4.java

Binary file not shown.