书名：Mastering Hadoop 3
作者名：Chanchal Singh Manish Kumar
本章字数：133字
更新时间：2025-04-04 14:54:50

MovieRatingReducer

The job of the Reducer is to filter out the top 20 movies by rating from all the output of multiple mappers. The Reducer simply iterates through values and maintains the top 20 movies by rating in memory. The records are flushed to the file once reducer completed its processing, as follows:


import org.apache.Hadoop.io.Text;
import org.apache.Hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.Map;
import java.util.TreeMap;

public class MovieRatingReducer extends Reducer<Text, Text, Text, Text> {
    private int K = 20;
    private TreeMap<String, String> topMiviesByRating = new TreeMap<>();

    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

        for (Text movie : values) {
            topMiviesByRating.put(key.toString(), movie.toString());
            if (topMiviesByRating.size() > K) {
                topMiviesByRating.remove(topMiviesByRating.firstKey());
            }
        }
    }

    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
        for (Map.Entry<String, String> movieDetail : topMiviesByRating.entrySet()) {
            context.write(new Text(movieDetail.getKey()), new Text(movieDetail.getValue()));
        }
    }
}