- Mastering Hadoop 3
- Chanchal Singh Manish Kumar
- 135字
- 2025-04-04 14:54:50
MovieRatingMapper
The job of the mapper is to process the record and emit the top 20 records it has processed for input split. We are also filtering out movies that have not been rated by at least 100 people. The code is as follows:
import org.apache.Hadoop.io.LongWritable;
import org.apache.Hadoop.io.Text;
import org.apache.Hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.Map;
import java.util.TreeMap;
public class MovieRatingMapper extends
Mapper<LongWritable, Text, Text, Text> {
private int K = 10;
private TreeMap<String, String> movieMap = new TreeMap<>();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] line_values = value.toString().split("\t");
String movie_title = line_values[0];
String movie_rating = line_values[1];
int noOfPeople=Integer.parseInt(line_values[2]);
if(noOfPeople>100) {
movieMap.put(movie_title, movie_rating);
if (movieMap.size() > K) {
movieMap.remove(movieMap.firstKey());
}
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
for (Map.Entry<String, String> movieDetail : movieMap.entrySet()) {
context.write(new Text(movieDetail.getKey()), new Text(movieDetail.getValue()));
}
}
}