- Mastering Hadoop 3
- Chanchal Singh Manish Kumar
- 133字
- 2025-04-04 14:54:50
MovieRatingReducer
The job of the Reducer is to filter out the top 20 movies by rating from all the output of multiple mappers. The Reducer simply iterates through values and maintains the top 20 movies by rating in memory. The records are flushed to the file once reducer completed its processing, as follows:
import org.apache.Hadoop.io.Text;
import org.apache.Hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.Map;
import java.util.TreeMap;
public class MovieRatingReducer extends Reducer<Text, Text, Text, Text> {
private int K = 20;
private TreeMap<String, String> topMiviesByRating = new TreeMap<>();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for (Text movie : values) {
topMiviesByRating.put(key.toString(), movie.toString());
if (topMiviesByRating.size() > K) {
topMiviesByRating.remove(topMiviesByRating.firstKey());
}
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
for (Map.Entry<String, String> movieDetail : topMiviesByRating.entrySet()) {
context.write(new Text(movieDetail.getKey()), new Text(movieDetail.getValue()));
}
}
}