Add distinct

This commit is contained in:
Max O'Cull 2019-04-21 15:08:46 -04:00
parent ae2e4f4427
commit beb3d1cea1

View File

@ -123,14 +123,15 @@ public class Project4 {
JavaRDD<Rating> filteredRatings = ratingRDD.filter(r -> r.getRating() == conf.q3Rating);
JavaPairRDD<Integer, Rating> filteredRatingsPair = filteredRatings.mapToPair(r -> new Tuple2(r.getUserId(), r));
JavaPairRDD<Integer, Movie> filteredMoviesPair = movieRDD.mapToPair(m -> new Tuple2(m.getMovieId(), m));
JavaPairRDD<Integer, Tuple2<User, Rating>> usersRatingsJoin = filteredUsersPair.join(filteredRatingsPair);
JavaPairRDD<Integer, Movie> filteredMoviesPair = movieRDD.mapToPair(m -> new Tuple2(m.getMovieId(), m));
// Map the previous RDD to <MovieID, Rating>
JavaPairRDD<Integer, Rating> movieMatchRatings = usersRatingsJoin.mapToPair(t -> new Tuple2(t._2()._2().getMovieId(), t._2()._2()));
JavaPairRDD<Integer, Tuple2<Rating, Movie>> moviesJoined = movieMatchRatings.join(filteredMoviesPair);
JavaRDD<String> movieIdRDD = moviesJoined.map(t -> t._1().toString());
JavaRDD<String> movieIdRDD = moviesJoined.map(t -> t._1().toString()).distinct();
movieIdRDD.collect().forEach(x -> System.out.println(x));
movieIdRDD.saveAsTextFile(CS448Utils.resolveUri(conf.outPath, "query-3"));