Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1115264
  • 博文数量: 143
  • 博客积分: 969
  • 博客等级: 准尉
  • 技术积分: 1765
  • 用 户 组: 普通用户
  • 注册时间: 2011-07-30 12:09
文章分类

全部博文(143)

文章存档

2023年(4)

2021年(2)

2020年(4)

2019年(4)

2018年(33)

2017年(6)

2016年(13)

2014年(7)

2013年(23)

2012年(33)

2011年(14)

我的朋友

分类: 大数据

2018-07-07 15:48:07

假设有如下数据:
100,200,300,400,500,600
200,100,300,400
300,100,200,400,500
400,100,200,300
500,100,300
600,100
其中第一列表示用户ID,其余列表示用户的好友ID,我们需要统计出这些用户的共同好友
其结果如下:
[100,200] [300, 400]  //表示用户100和200的共同好友是 300,400
[100,300] [200, 400, 500]
[100,400] [200, 300]
[100,500] [300]
[100,600] []
[200,300] [100, 400]
[200,400] [100, 300]
[300,400] [100, 200]
[300,500] [100]

实现原理如下:
1、在Mapper阶段将用户ID和好友ID作为新的Key,好友列表作为value,发送到Reducer中,这样的相同的Key发送到同一reduce中
2、Reducer阶段对,对这些value取交集,就可以得到共同好友列表,将这些好友列表输出即可。

具体实现如下:

点击(此处)折叠或打开

  1. import org.apache.hadoop.fs.FileSystem;
  2. import org.apache.hadoop.fs.Path;
  3. import org.apache.hadoop.io.LongWritable;
  4. import org.apache.hadoop.io.Text;
  5. import org.apache.hadoop.mapreduce.Job;
  6. import org.apache.hadoop.mapreduce.Mapper;
  7. import org.apache.hadoop.mapreduce.Reducer;
  8. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  9. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

  10. import java.io.IOException;
  11. import java.util.ArrayList;
  12. import java.util.HashMap;
  13. import java.util.List;
  14. import java.util.Map;

  15. public class CommonFriendsJob {
  16.     /*
  17.     * map input
  18.         100,200,300,400,500,600
  19.         200,100,300,400
  20.         300,100,200,400,500
  21.         400,100,200,300
  22.         500,100,300
  23.         600,100
  24.     * */
  25.     public static class CommonFriendMapper extends Mapper<LongWritable, Text, Text,Text>{
  26.         private static final Text outKey = new Text();
  27.         private static final Text outValue = new Text();
  28.         @Override
  29.         protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {
  30.             String strValue = value.toString();
  31.             System.out.println("map input:"+ strValue);
  32.             String[] arrays = strValue.split(",");
  33.             String person = arrays[0];//第一个元素表示用户,其余的表示好友
  34.             String friends =strValue.substring(strValue.indexOf(",")+1);
  35.             for (int i=1;i<arrays.length;i++){
  36.                 String strOutKey = "";
  37.                 String friend = arrays[i];
  38.                 //避免两个值相同但顺序不同的情况下发送到不同的reduce中
  39.                if(Integer.parseInt(person)<Integer.parseInt(friend)){
  40.                    strOutKey="["+person+","+ friend+"]";
  41.                }else {
  42.                    strOutKey="["+ friend +","+ person+"]";
  43.                }
  44.                 outKey.set(strOutKey);
  45.                 outValue.set(friends);
  46.                 System.out.println("map out, key:"+strOutKey+" value:"+friends);
  47.                 context.write(outKey, outValue);
  48.             }
  49.         }
  50.     }

  51.     /*
  52.         reduce out:
  53.         [100,200]    [300, 400]
  54.         [100,300]    [200, 400, 500]
  55.         [100,400]    [200, 300]
  56.         [100,500]    [300]
  57.         [100,600]    []
  58.         [200,300]    [100, 400]
  59.         [200,400]    [100, 300]
  60.         [300,400]    [100, 200]
  61.         [300,500]    [100]
  62.       * */
  63.     public static class CommonFriendReducer extends Reducer<Text, Text, Text, Text>{
  64.         @Override
  65.         protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
  66.             int num=0;
  67.             Map<String, Integer> countMap = new HashMap<String,Integer>();
  68.             for (Text value: values){
  69.                 System.out.println("reduce input,key:"+key.toString() +"value:"+value.toString());
  70.                 String[] friends = value.toString().split(",");
  71.                 for (String friend:friends){//用来统计共同好友个数
  72.                     if(countMap.get(friend)==null){
  73.                         countMap.put(friend, 1);
  74.                     }else {
  75.                         countMap.put(friend,countMap.get(friend)+1);
  76.                     }
  77.                 }
  78.                 num++;
  79.             }

  80.             List<String> commonFriendList= new ArrayList<String>();
  81.             for (Map.Entry<String,Integer> entry: countMap.entrySet()){
  82.                 if(entry.getValue()==num && num!=1 ){//如果Key的个数与好友个数相同,那么是共同好友
  83.                     commonFriendList.add(entry.getKey());
  84.                 }
  85.             }

  86.             context.write(key, new Text(commonFriendList.toString()));
  87.         }
  88.     }


  89.     public static void main(String []args){

  90.         try {
  91.             Job job = Job.getInstance();
  92.             job.setJobName("CommonFriendsJob");
  93.             job.setJarByClass(CommonFriendsJob.class);

  94.             job.setMapperClass(CommonFriendMapper.class);
  95.             job.setMapOutputKeyClass(Text.class);
  96.             job.setMapOutputValueClass(Text.class);

  97.             job.setReducerClass(CommonFriendReducer.class);
  98.             job.setOutputKeyClass(Text.class);
  99.             job.setOutputValueClass(Text.class);

  100.             job.setNumReduceTasks(1);

  101.             FileInputFormat.setInputPaths(job, new Path(args[0]));
  102.             FileOutputFormat.setOutputPath(job,new Path(args[1]));

  103.             FileSystem.get(job.getConfiguration()).delete(new Path(args[1]),true);

  104.             System.out.println(job.waitForCompletion(true));
  105.         } catch (IOException e) {
  106.             e.printStackTrace();
  107.         } catch (InterruptedException e) {
  108.             e.printStackTrace();
  109.         } catch (ClassNotFoundException e) {
  110.             e.printStackTrace();
  111.         }

  112.     }


阅读(1252) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~