【发布时间】:2018-04-08 23:12:36
【问题描述】:
我尝试使用解决方案在Hadoop 中对我的reducer 的输出进行排序,如本问题所述:
MapReduce sort by value in descending order
这个和Java8有些冲突,我解决如下:
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.util.Map;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.LinkedHashMap;
import java.util.Collections;
import java.util.List;
import java.util.Comparator;
public class HourlyTweetsReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public Map<String , Integer> map = new LinkedHashMap<String , Integer>();
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
map.put(key.toString() , sum);
result.set(sum);
context.write(key, result);
}
public void cleanup(Context context){
//Cleanup is called once at the end to finish off anything for reducer
//Here we will write our final output
Map<String , Integer> sortedMap = new HashMap<String , Integer>();
sortedMap = sortMap(map);
for (Map.Entry<String,Integer> entry : sortedMap.entrySet()){
context.write(new Text(entry.getKey()),new IntWritable(entry.getValue()));
}
}
public Map<String , Integer > sortMap (Map<String,Integer> unsortMap){
Map<String ,Integer> hashmap = new HashMap<String,Integer>();
int count=0;
List<Map.Entry<String,Integer>> list = new LinkedList<Map.Entry<String,Integer>>(unsortMap.entrySet());
//Sorting the list we created from unsorted Map
Collections.sort(list , new Comparator<Map.Entry<String,Integer>>(){
public int compare (Map.Entry<String , Integer> o1 , Map.Entry<String , Integer> o2 ){
//sorting in descending order
return o2.getValue().compareTo(o1.getValue());
}
});
for(Map.Entry<String, Integer> entry : list){
// only writing top 3 in the sorted map
// if(count>2)
// break;
hashmap.put(entry.getKey(),entry.getValue());
}
return hashmap ;
}
}
问题是运行作业后输出没有排序:
11 1041557
14 1304166
17 1434978
2 733462
20 1288767
23 1677571
5 460629
8 497403
11 1041557
23 1677571
2 733462
14 1304166
5 460629
17 1434978
8 497403
20 1288767
我们如何解决它?
【问题讨论】:
-
HourlyTweets.java 使用或覆盖已弃用的 API 这是您使用的库还是您自己的程序?
-
这是我自己基于Hadoop的程序
-
它只是在错误消息中声明:
unreported exception IOException; must be caught or declared to be throwninHourlyTweetsReducer.java:45 -
只需在声明的
45行上使用漂亮的try -
你是用记事本开发的吗?一个体面的 IDE 甚至可能会为您提供如何操作的选项。
标签: java hadoop java-8 reducers